From be5985b3dbce5ba2af3c8b0f2b7df235c93907e6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 26 Mar 2022 18:51:50 +0300 Subject: [PATCH 001/803] cpufreq: qcom-hw: drop affinity hint before freeing the IRQ Drop affinity hint before freeing the throttling IRQ to fix the following trace: [ 185.114773] ------------[ cut here ]------------ [ 185.119517] WARNING: CPU: 7 PID: 43 at kernel/irq/manage.c:1887 free_irq+0x3a4/0x3dc [ 185.127474] Modules linked in: [ 185.130618] CPU: 7 PID: 43 Comm: cpuhp/7 Tainted: G S W 5.17.0-rc6-00386-g67382a5b705d-dirty #690 [ 185.147125] pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 185.154269] pc : free_irq+0x3a4/0x3dc [ 185.158031] lr : free_irq+0x33c/0x3dc [ 185.161792] sp : ffff80000841bc90 [ 185.165195] x29: ffff80000841bc90 x28: ffffa6edc5c3d000 x27: ffff6d93729e5908 [ 185.172515] x26: 0000000000000000 x25: ffff6d910109fc00 x24: ffff6d91011490e0 [ 185.179838] x23: ffff6d9101149218 x22: 0000000000000080 x21: 0000000000000000 [ 185.187163] x20: ffff6d9101149000 x19: ffff6d910ab61500 x18: ffffffffffffffff [ 185.194487] x17: 2e35202020202020 x16: 2020202020202020 x15: ffff80008841b9a7 [ 185.201805] x14: 00000000000003c9 x13: 0000000000000001 x12: 0000000000000040 [ 185.209135] x11: ffff6d91005aab58 x10: ffff6d91005aab5a x9 : ffffc6a5ad1c5408 [ 185.216455] x8 : ffff6d91005adb88 x7 : 0000000000000000 x6 : ffffc6a5ab5a91f4 [ 185.223776] x5 : 0000000000000000 x4 : ffff6d91011490a8 x3 : ffffc6a5ad266108 [ 185.231098] x2 : 0000000013033204 x1 : ffff6d9101149000 x0 : ffff6d910a9cc000 [ 185.238421] Call trace: [ 185.240932] free_irq+0x3a4/0x3dc [ 185.244334] qcom_cpufreq_hw_cpu_exit+0x78/0xcc [ 185.248985] cpufreq_offline.isra.0+0x228/0x270 [ 185.253639] cpuhp_cpufreq_offline+0x10/0x20 [ 185.258027] cpuhp_invoke_callback+0x16c/0x2b0 [ 185.262592] cpuhp_thread_fun+0x190/0x250 [ 185.266710] smpboot_thread_fn+0x12c/0x230 [ 185.270914] kthread+0xfc/0x100 [ 185.274145] ret_from_fork+0x10/0x20 [ 185.277820] irq event stamp: 212 [ 185.281136] hardirqs last enabled at (211): [] _raw_spin_unlock_irqrestore+0x8c/0xa0 [ 185.290775] hardirqs last disabled at (212): [] __schedule+0x710/0xa10 [ 185.299081] softirqs last enabled at (0): [] copy_process+0x7d0/0x1a14 [ 185.307475] softirqs last disabled at (0): [<0000000000000000>] 0x0 Fixes: 3ed6dfbd3bb98 ("cpufreq: qcom-hw: Set CPU affinity of dcvsh interrupts") Tested-by: Vladimir Zapolskiy Reviewed-by: Vladimir Zapolskiy Reviewed-by: Bjorn Andersson Signed-off-by: Dmitry Baryshkov Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index f9d593ff4718..3cacd38bbdd7 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -427,6 +427,7 @@ static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) mutex_unlock(&data->throttle_lock); cancel_delayed_work_sync(&data->throttle_work); + irq_set_affinity_hint(data->throttle_irq, NULL); free_irq(data->throttle_irq, data); } From 5e4f009da6be563984ba4db4ef4f32529e9aeb90 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 26 Mar 2022 18:51:51 +0300 Subject: [PATCH 002/803] cpufreq: qcom-hw: fix the race between LMH worker and cpuhp The driver would disable the worker when cpu is being put offline, but it happens closer to the end of cpufreq_offline(). The function qcom_lmh_dcvs_poll() can be running in parallel with this, when policy->cpus already has been updated. Read policy->related_cpus instead. [ 37.122433] ------------[ cut here ]------------ [ 37.127225] WARNING: CPU: 0 PID: 187 at drivers/base/arch_topology.c:180 topology_update_thermal_pressure+0xec/0x100 [ 37.138098] Modules linked in: [ 37.141279] CPU: 0 PID: 187 Comm: kworker/0:3 Tainted: G S 5.17.0-rc6-00389-g37c83d0b8710-dirty #713 [ 37.158306] Workqueue: events qcom_lmh_dcvs_poll [ 37.163095] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 37.170278] pc : topology_update_thermal_pressure+0xec/0x100 [ 37.176131] lr : topology_update_thermal_pressure+0x20/0x100 [ 37.181977] sp : ffff800009b6bce0 [ 37.185402] x29: ffff800009b6bce0 x28: ffffd87abe92b000 x27: ffff04bd7292e205 [ 37.192792] x26: ffffd87abe930af8 x25: ffffd87abe94e4c8 x24: 0000000000000000 [ 37.200180] x23: ffff04bb01177018 x22: ffff04bb011770c0 x21: ffff04bb01177000 [ 37.207567] x20: ffff04bb0a419000 x19: 00000000000c4e00 x18: 0000000000000000 [ 37.214954] x17: 000000040044ffff x16: 004000b2b5503510 x15: 0000006aaa1326d2 [ 37.222333] x14: 0000000000000232 x13: 0000000000000001 x12: 0000000000000040 [ 37.229718] x11: ffff04bb00400000 x10: 968f57bd39f701c8 x9 : ffff04bb0acc8674 [ 37.237095] x8 : fefefefefefefeff x7 : 0000000000000018 x6 : ffffd87abd90092c [ 37.244478] x5 : 0000000000000016 x4 : 0000000000000000 x3 : 0000000000000100 [ 37.251852] x2 : ffff04bb0a419020 x1 : 0000000000000100 x0 : 0000000000000100 [ 37.259235] Call trace: [ 37.261771] topology_update_thermal_pressure+0xec/0x100 [ 37.267266] qcom_lmh_dcvs_poll+0xbc/0x154 [ 37.271505] process_one_work+0x288/0x69c [ 37.275654] worker_thread+0x74/0x470 [ 37.279450] kthread+0xfc/0x100 [ 37.282712] ret_from_fork+0x10/0x20 [ 37.286417] irq event stamp: 74 [ 37.289664] hardirqs last enabled at (73): [] _raw_spin_unlock_irq+0x44/0x80 [ 37.298632] hardirqs last disabled at (74): [] __schedule+0x710/0xa10 [ 37.306885] softirqs last enabled at (58): [] _stext+0x410/0x588 [ 37.314778] softirqs last disabled at (51): [] __irq_exit_rcu+0x158/0x174 [ 37.323386] ---[ end trace 0000000000000000 ]--- Fixes: 275157b367f4 ("cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 3cacd38bbdd7..534eb1a17c9b 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -290,7 +290,7 @@ static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { struct cpufreq_policy *policy = data->policy; - int cpu = cpumask_first(policy->cpus); + int cpu = cpumask_first(policy->related_cpus); struct device *dev = get_cpu_device(cpu); unsigned long freq_hz, throttled_freq; struct dev_pm_opp *opp; From 6240aaad75e1a623872a830d13393d7aabf1052c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 26 Mar 2022 18:51:52 +0300 Subject: [PATCH 003/803] cpufreq: qcom-hw: fix the opp entries refcounting The qcom_lmh_dcvs_notify() will get the dev_pm_opp instance for throttling, but will not put it, ending up with leaking a reference count and the following backtrace when putting the CPU offline. Correctly put the reference count of the returned opp instance. [ 84.418025] ------------[ cut here ]------------ [ 84.422770] WARNING: CPU: 7 PID: 43 at drivers/opp/core.c:1396 _opp_table_kref_release+0x188/0x190 [ 84.431966] Modules linked in: [ 84.435106] CPU: 7 PID: 43 Comm: cpuhp/7 Tainted: G S 5.17.0-rc6-00388-g7cf3c0d89c44-dirty #721 [ 84.451631] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [ 84.458781] pc : _opp_table_kref_release+0x188/0x190 [ 84.463878] lr : _opp_table_kref_release+0x78/0x190 [ 84.468885] sp : ffff80000841bc70 [ 84.472294] x29: ffff80000841bc70 x28: ffff6664afe3d000 x27: ffff1db6729e5908 [ 84.479621] x26: 0000000000000000 x25: 0000000000000000 x24: ffff1db6729e58e0 [ 84.486946] x23: ffff8000080a5000 x22: ffff1db40aad80e0 x21: ffff1db4002fec80 [ 84.494277] x20: ffff1db40aad8000 x19: ffffb751c3186300 x18: ffffffffffffffff [ 84.501603] x17: 5300326563697665 x16: 645f676e696c6f6f x15: 00001186c1df5448 [ 84.508928] x14: 00000000000002e9 x13: 0000000000000000 x12: 0000000000000000 [ 84.516256] x11: ffffb751c3186368 x10: ffffb751c39a2a70 x9 : 0000000000000000 [ 84.523585] x8 : ffff1db4008edf00 x7 : ffffb751c328c000 x6 : 0000000000000001 [ 84.530916] x5 : 0000000000040000 x4 : 0000000000000001 x3 : ffff1db4008edf00 [ 84.538247] x2 : 0000000000000000 x1 : ffff1db400aa6100 x0 : ffff1db40aad80d0 [ 84.545579] Call trace: [ 84.548101] _opp_table_kref_release+0x188/0x190 [ 84.552842] dev_pm_opp_remove_all_dynamic+0x8c/0xc0 [ 84.557949] qcom_cpufreq_hw_cpu_exit+0x30/0xdc [ 84.562608] cpufreq_offline.isra.0+0x1b4/0x1d8 [ 84.567270] cpuhp_cpufreq_offline+0x10/0x6c [ 84.571663] cpuhp_invoke_callback+0x16c/0x2b0 [ 84.576231] cpuhp_thread_fun+0x190/0x250 [ 84.580353] smpboot_thread_fn+0x12c/0x230 [ 84.584568] kthread+0xfc/0x100 [ 84.587810] ret_from_fork+0x10/0x20 [ 84.591490] irq event stamp: 3482 [ 84.594901] hardirqs last enabled at (3481): [] call_rcu+0x39c/0x50c [ 84.603119] hardirqs last disabled at (3482): [] el1_dbg+0x24/0x8c [ 84.611074] softirqs last enabled at (310): [] _stext+0x410/0x588 [ 84.619028] softirqs last disabled at (305): [] __irq_exit_rcu+0x158/0x174 [ 84.627691] ---[ end trace 0000000000000000 ]--- Fixes: 275157b367f4 ("cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support") Reported-by: kernel test robot Tested-by: Vladimir Zapolskiy Reviewed-by: Vladimir Zapolskiy Reviewed-by: Bjorn Andersson Signed-off-by: Dmitry Baryshkov Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 534eb1a17c9b..9bbadcea48aa 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -305,12 +305,18 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) - dev_pm_opp_find_freq_ceil(dev, &freq_hz); + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz); - throttled_freq = freq_hz / HZ_PER_KHZ; + if (IS_ERR(opp)) { + dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp); + } else { + throttled_freq = freq_hz / HZ_PER_KHZ; - /* Update thermal pressure (the boost frequencies are accepted) */ - arch_update_thermal_pressure(policy->related_cpus, throttled_freq); + /* Update thermal pressure (the boost frequencies are accepted) */ + arch_update_thermal_pressure(policy->related_cpus, throttled_freq); + + dev_pm_opp_put(opp); + } /* * In the unlikely case policy is unregistered do not enable From a1eb080a04477a55c66d70fb3401b059d6dcc3a9 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 26 Mar 2022 18:51:53 +0300 Subject: [PATCH 004/803] cpufreq: qcom-hw: provide online/offline operations Provide lightweight online and offline operations. This saves us from parsing and tearing down the OPP tables each time the CPU is put online or offline. Tested-by: Vladimir Zapolskiy Reviewed-by: Vladimir Zapolskiy Reviewed-by: Bjorn Andersson Signed-off-by: Dmitry Baryshkov Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 9bbadcea48aa..efa264fed1a0 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -423,10 +423,26 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) return 0; } -static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy) { + struct qcom_cpufreq_data *data = policy->driver_data; + struct platform_device *pdev = cpufreq_get_driver_data(); + int ret; + + ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus); + if (ret) + dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", + data->irq_name, data->throttle_irq); + + return ret; +} + +static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + if (data->throttle_irq <= 0) - return; + return 0; mutex_lock(&data->throttle_lock); data->cancel_throttle = true; @@ -434,6 +450,12 @@ static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) cancel_delayed_work_sync(&data->throttle_work); irq_set_affinity_hint(data->throttle_irq, NULL); + + return 0; +} + +static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +{ free_irq(data->throttle_irq, data); } @@ -590,6 +612,8 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .get = qcom_cpufreq_hw_get, .init = qcom_cpufreq_hw_cpu_init, .exit = qcom_cpufreq_hw_cpu_exit, + .online = qcom_cpufreq_hw_cpu_online, + .offline = qcom_cpufreq_hw_cpu_offline, .register_em = cpufreq_register_em_with_opp, .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", From f84ccad5f5660f86a642a3d7e2bfdc4e7a8a2d49 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 1 Apr 2022 10:14:24 +0300 Subject: [PATCH 005/803] cpufreq: qcom-cpufreq-hw: Fix throttle frequency value on EPSS platforms On QCOM platforms with EPSS flavour of cpufreq IP a throttled frequency is obtained from another register REG_DOMAIN_STATE, thus the helper function qcom_lmh_get_throttle_freq() should be modified accordingly, as for now it returns gibberish since .reg_current_vote is unset for EPSS hardware. To exclude a hardcoded magic number 19200 it is replaced by "xo" clock rate in KHz. Fixes: 275157b367f4 ("cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support") Reviewed-by: Bjorn Andersson Signed-off-by: Vladimir Zapolskiy Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index efa264fed1a0..0ec18e1589dc 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -28,6 +28,7 @@ struct qcom_cpufreq_soc_data { u32 reg_enable; + u32 reg_domain_state; u32 reg_dcvs_ctrl; u32 reg_freq_lut; u32 reg_volt_lut; @@ -280,11 +281,16 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) { - unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote); + unsigned int lval; - return (val & 0x3FF) * 19200; + if (data->soc_data->reg_current_vote) + lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; + + return lval * xo_rate; } static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) @@ -294,14 +300,12 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) struct device *dev = get_cpu_device(cpu); unsigned long freq_hz, throttled_freq; struct dev_pm_opp *opp; - unsigned int freq; /* * Get the h/w throttled frequency, normalize it using the * registered opp table and use it to calculate thermal pressure. */ - freq = qcom_lmh_get_throttle_freq(data); - freq_hz = freq * HZ_PER_KHZ; + freq_hz = qcom_lmh_get_throttle_freq(data); opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) @@ -371,6 +375,7 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { static const struct qcom_cpufreq_soc_data epss_soc_data = { .reg_enable = 0x0, + .reg_domain_state = 0x20, .reg_dcvs_ctrl = 0xb0, .reg_freq_lut = 0x100, .reg_volt_lut = 0x200, From af11f31715b50ce77e50fa393bc530df0f33960b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 22 Mar 2022 21:33:38 +0100 Subject: [PATCH 006/803] video: fbdev: of: display_timing: Remove a redundant zeroing of memory of_parse_display_timing() already call memset(0) on its 2nd argument, so there is no need to clear it explicitly before calling this function. Use kmalloc() instead of kzalloc() to save a few cycles. Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller --- drivers/video/of_display_timing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c index f93b6abbe258..bebd371c6b93 100644 --- a/drivers/video/of_display_timing.c +++ b/drivers/video/of_display_timing.c @@ -199,7 +199,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np) struct display_timing *dt; int r; - dt = kzalloc(sizeof(*dt), GFP_KERNEL); + dt = kmalloc(sizeof(*dt), GFP_KERNEL); if (!dt) { pr_err("%pOF: could not allocate display_timing struct\n", np); From aaf7dbe07385e0b8deb7237eca2a79926bbc7091 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 22 Mar 2022 23:04:38 +0300 Subject: [PATCH 007/803] video: fbdev: udlfb: properly check endpoint type syzbot reported warning in usb_submit_urb, which is caused by wrong endpoint type. This driver uses out bulk endpoint for communication, so let's check if this endpoint is present and bail out early if not. Fail log: usb 1-1: BOGUS urb xfer, pipe 3 != type 1 WARNING: CPU: 0 PID: 4822 at drivers/usb/core/urb.c:493 usb_submit_urb+0xd27/0x1540 drivers/usb/core/urb.c:493 Modules linked in: CPU: 0 PID: 4822 Comm: kworker/0:3 Tainted: G W 5.13.0-syzkaller #0 ... Workqueue: usb_hub_wq hub_event RIP: 0010:usb_submit_urb+0xd27/0x1540 drivers/usb/core/urb.c:493 ... Call Trace: dlfb_submit_urb+0x89/0x160 drivers/video/fbdev/udlfb.c:1969 dlfb_set_video_mode+0x21f0/0x2950 drivers/video/fbdev/udlfb.c:315 dlfb_ops_set_par+0x2a3/0x840 drivers/video/fbdev/udlfb.c:1110 dlfb_usb_probe.cold+0x113e/0x1f4a drivers/video/fbdev/udlfb.c:1732 usb_probe_interface+0x315/0x7f0 drivers/usb/core/driver.c:396 Fixes: 88e58b1a42f8 ("Staging: add udlfb driver") Reported-and-tested-by: syzbot+53ce4a4246d0fe0fee34@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Helge Deller --- drivers/video/fbdev/udlfb.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b6ec0b8e2b72..d280733f283b 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1650,8 +1650,9 @@ static int dlfb_usb_probe(struct usb_interface *intf, const struct device_attribute *attr; struct dlfb_data *dlfb; struct fb_info *info; - int retval = -ENOMEM; + int retval; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *out; /* usb initialization */ dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL); @@ -1665,6 +1666,12 @@ static int dlfb_usb_probe(struct usb_interface *intf, dlfb->udev = usb_get_dev(usbdev); usb_set_intfdata(intf, dlfb); + retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL); + if (retval) { + dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n"); + goto error; + } + dev_dbg(&intf->dev, "console enable=%d\n", console); dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio); dev_dbg(&intf->dev, "shadow enable=%d\n", shadow); @@ -1674,6 +1681,7 @@ static int dlfb_usb_probe(struct usb_interface *intf, if (!dlfb_parse_vendor_descriptor(dlfb, intf)) { dev_err(&intf->dev, "firmware not recognized, incompatible device?\n"); + retval = -ENODEV; goto error; } @@ -1687,8 +1695,10 @@ static int dlfb_usb_probe(struct usb_interface *intf, /* allocates framebuffer driver structure, not framebuffer memory */ info = framebuffer_alloc(0, &dlfb->udev->dev); - if (!info) + if (!info) { + retval = -ENOMEM; goto error; + } dlfb->info = info; info->par = dlfb; From b23e868d35d572d459e9be4b994a8c709f1a1606 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Tue, 29 Mar 2022 02:14:32 -0700 Subject: [PATCH 008/803] video: fbdev: pxafb: use if else instead use if and else instead of consequent if(A) and if (!A) Signed-off-by: Wang Qing Signed-off-by: Helge Deller --- drivers/video/fbdev/pxafb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index f1551e00eb12..8ad91c251fe6 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2256,10 +2256,10 @@ static int pxafb_probe(struct platform_device *dev) goto failed; for (i = 0; i < inf->num_modes; i++) inf->modes[i] = pdata->modes[i]; + } else { + inf = of_pxafb_of_mach_info(&dev->dev); } - if (!pdata) - inf = of_pxafb_of_mach_info(&dev->dev); if (IS_ERR_OR_NULL(inf)) goto failed; From d1d608ce78b3fc330938faaa1f70a91cf20c03a9 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Fri, 1 Apr 2022 11:41:16 +0800 Subject: [PATCH 009/803] video: fbdev: sis: fix potential NULL dereference in sisfb_post_sis300() Do no access bios[] if it's NULL. Signed-off-by: Haowen Bai Signed-off-by: Helge Deller --- drivers/video/fbdev/sis/sis_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 742f62986b80..f28fd69d5eb7 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -4463,7 +4463,7 @@ static void sisfb_post_sis300(struct pci_dev *pdev) SiS_SetReg(SISCR, 0x37, 0x02); SiS_SetReg(SISPART2, 0x00, 0x1c); v4 = 0x00; v5 = 0x00; v6 = 0x10; - if(ivideo->SiS_Pr.UseROM) { + if (ivideo->SiS_Pr.UseROM && bios) { v4 = bios[0xf5]; v5 = bios[0xf6]; v6 = bios[0xf7]; From f56b919fa4f1b27c589e71f7d90e9785f9196bf1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Feb 2022 09:39:22 +0100 Subject: [PATCH 010/803] linux/fb.h: Spelling s/palette/palette/ Fix a misspelling of "palette" in a comment. Signed-off-by: Geert Uytterhoeven Reviewed-by: Pekka Paalanen Signed-off-by: Helge Deller --- include/uapi/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index 4c14e8be7267..3a49913d006c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -182,7 +182,7 @@ struct fb_fix_screeninfo { * * For pseudocolor: offset and length should be the same for all color * components. Offset specifies the position of the least significant bit - * of the pallette index in a pixel value. Length indicates the number + * of the palette index in a pixel value. Length indicates the number * of available palette entries (i.e. # of entries = 1 << length). */ struct fb_bitfield { From 5c6d8b23cef8feb0039377e355f67aa7441c1115 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Fri, 1 Apr 2022 16:41:57 +0800 Subject: [PATCH 011/803] video: fbdev: pm2fb: Fix a kernel-doc formatting issue This function had kernel-doc that not used a hash to separate the function name from the one line description. Signed-off-by: Haowen Bai Signed-off-by: Helge Deller --- drivers/video/fbdev/pm2fb.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c index c68725eebee3..d3be2c64f1c0 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -1504,9 +1504,7 @@ static const struct fb_ops pm2fb_ops = { /** - * Device initialisation - * - * Initialise and allocate resource for PCI device. + * pm2fb_probe - Initialise and allocate resource for PCI device. * * @pdev: PCI device. * @id: PCI device ID. @@ -1711,9 +1709,7 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /** - * Device removal. - * - * Release all device resources. + * pm2fb_remove - Release all device resources. * * @pdev: PCI device to clean up. */ From 2a8f0934e92242e90be6ef20c5f9f77eef1e333f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 2 Apr 2022 12:22:56 +0200 Subject: [PATCH 012/803] video: fbdev: aty/matrox/...: Prepare cleanup of powerpc's asm/prom.h powerpc's asm/prom.h brings some headers that it doesn't need itself. In order to clean it up, first add missing headers in users of asm/prom.h Signed-off-by: Christophe Leroy Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/aty128fb.c | 1 - drivers/video/fbdev/aty/atyfb_base.c | 1 - drivers/video/fbdev/aty/radeon_pm.c | 1 - drivers/video/fbdev/aty/radeonfb.h | 2 +- drivers/video/fbdev/controlfb.c | 3 --- drivers/video/fbdev/matrox/matroxfb_base.h | 1 - drivers/video/fbdev/mb862xx/mb862xxfbdrv.c | 2 ++ drivers/video/fbdev/platinumfb.c | 2 +- drivers/video/fbdev/valkyriefb.c | 3 +-- 9 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index 6ff16d3132e5..b26c81233b6b 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -68,7 +68,6 @@ #ifdef CONFIG_PPC_PMAC #include #include -#include #include "../macmodes.h" #endif diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 1aef3d6ebd88..a3e6faed7745 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -79,7 +79,6 @@ #ifdef __powerpc__ #include -#include #include "../macmodes.h" #endif #ifdef __sparc__ diff --git a/drivers/video/fbdev/aty/radeon_pm.c b/drivers/video/fbdev/aty/radeon_pm.c index b5fbd5329652..97a5972f5b1f 100644 --- a/drivers/video/fbdev/aty/radeon_pm.c +++ b/drivers/video/fbdev/aty/radeon_pm.c @@ -22,7 +22,6 @@ #ifdef CONFIG_PPC_PMAC #include -#include #include #endif diff --git a/drivers/video/fbdev/aty/radeonfb.h b/drivers/video/fbdev/aty/radeonfb.h index 93f403cbb415..91d81b576231 100644 --- a/drivers/video/fbdev/aty/radeonfb.h +++ b/drivers/video/fbdev/aty/radeonfb.h @@ -21,7 +21,7 @@ #include -#if defined(CONFIG_PPC) || defined(CONFIG_SPARC) +#ifdef CONFIG_SPARC #include #endif diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index bd59e7b11ed5..aba46118b208 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -47,9 +47,6 @@ #include #include #include -#ifdef CONFIG_PPC_PMAC -#include -#endif #ifdef CONFIG_BOOTX_TEXT #include #endif diff --git a/drivers/video/fbdev/matrox/matroxfb_base.h b/drivers/video/fbdev/matrox/matroxfb_base.h index 759dee996af1..958be6805f87 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.h +++ b/drivers/video/fbdev/matrox/matroxfb_base.h @@ -47,7 +47,6 @@ #include #if defined(CONFIG_PPC_PMAC) -#include #include "../macmodes.h" #endif diff --git a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c index 63721337a377..a7508f5be343 100644 --- a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c +++ b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c @@ -18,6 +18,8 @@ #include #include #if defined(CONFIG_OF) +#include +#include #include #endif #include "mb862xxfb.h" diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c index ce413a9df06e..5b9e26ea6449 100644 --- a/drivers/video/fbdev/platinumfb.c +++ b/drivers/video/fbdev/platinumfb.c @@ -30,9 +30,9 @@ #include #include #include +#include #include #include -#include #include "macmodes.h" #include "platinumfb.h" diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c index 8425afe37d7c..a6c9d4f26669 100644 --- a/drivers/video/fbdev/valkyriefb.c +++ b/drivers/video/fbdev/valkyriefb.c @@ -54,10 +54,9 @@ #include #include #include +#include #ifdef CONFIG_MAC #include -#else -#include #endif #include "macmodes.h" From 7e4920bf59cb085e148796e937a8e8212fd2bae0 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sat, 2 Apr 2022 13:54:44 +0200 Subject: [PATCH 013/803] video: fbdev: omap: Make it CCF clk API compatible OMAP1 LCDC drivers now omit clk_prepare/unprepare() steps, not supported by OMAP1 custom implementation of clock API. However, non-CCF stubs of those functions exist for use on such platforms until converted to CCF. Update the drivers to be compatible with CCF implementation of clock API. Signed-off-by: Janusz Krzysztofik Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/hwa742.c | 6 +++--- drivers/video/fbdev/omap/lcdc.c | 6 +++--- drivers/video/fbdev/omap/sossi.c | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c index b191bef22d98..9d9fe5c3a7a1 100644 --- a/drivers/video/fbdev/omap/hwa742.c +++ b/drivers/video/fbdev/omap/hwa742.c @@ -964,7 +964,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode, if ((r = calc_extif_timings(ext_clk, &extif_mem_div)) < 0) goto err3; hwa742.extif->set_timings(&hwa742.reg_timings); - clk_enable(hwa742.sys_ck); + clk_prepare_enable(hwa742.sys_ck); calc_hwa742_clk_rates(ext_clk, &sys_clk, &pix_clk); if ((r = calc_extif_timings(sys_clk, &extif_mem_div)) < 0) @@ -1023,7 +1023,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode, return 0; err4: - clk_disable(hwa742.sys_ck); + clk_disable_unprepare(hwa742.sys_ck); err3: hwa742.extif->cleanup(); err2: @@ -1037,7 +1037,7 @@ static void hwa742_cleanup(void) hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED); hwa742.extif->cleanup(); hwa742.int_ctrl->cleanup(); - clk_disable(hwa742.sys_ck); + clk_disable_unprepare(hwa742.sys_ck); } struct lcd_ctrl hwa742_ctrl = { diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c index 7317c9aad677..97d20dc0d1d0 100644 --- a/drivers/video/fbdev/omap/lcdc.c +++ b/drivers/video/fbdev/omap/lcdc.c @@ -711,7 +711,7 @@ static int omap_lcdc_init(struct omapfb_device *fbdev, int ext_mode, dev_err(fbdev->dev, "failed to adjust LCD rate\n"); goto fail1; } - clk_enable(lcdc.lcd_ck); + clk_prepare_enable(lcdc.lcd_ck); r = request_irq(OMAP_LCDC_IRQ, lcdc_irq_handler, 0, MODULE_NAME, fbdev); if (r) { @@ -746,7 +746,7 @@ fail4: fail3: free_irq(OMAP_LCDC_IRQ, lcdc.fbdev); fail2: - clk_disable(lcdc.lcd_ck); + clk_disable_unprepare(lcdc.lcd_ck); fail1: clk_put(lcdc.lcd_ck); fail0: @@ -760,7 +760,7 @@ static void omap_lcdc_cleanup(void) free_fbmem(); omap_free_lcd_dma(); free_irq(OMAP_LCDC_IRQ, lcdc.fbdev); - clk_disable(lcdc.lcd_ck); + clk_disable_unprepare(lcdc.lcd_ck); clk_put(lcdc.lcd_ck); } diff --git a/drivers/video/fbdev/omap/sossi.c b/drivers/video/fbdev/omap/sossi.c index 80ac67f27f0d..b9cb8b386627 100644 --- a/drivers/video/fbdev/omap/sossi.c +++ b/drivers/video/fbdev/omap/sossi.c @@ -598,7 +598,7 @@ static int sossi_init(struct omapfb_device *fbdev) l &= ~CONF_SOSSI_RESET_R; omap_writel(l, MOD_CONF_CTRL_1); - clk_enable(sossi.fck); + clk_prepare_enable(sossi.fck); l = omap_readl(ARM_IDLECT2); l &= ~(1 << 8); /* DMACK_REQ */ omap_writel(l, ARM_IDLECT2); @@ -649,7 +649,7 @@ static int sossi_init(struct omapfb_device *fbdev) return 0; err: - clk_disable(sossi.fck); + clk_disable_unprepare(sossi.fck); clk_put(sossi.fck); return r; } @@ -657,6 +657,7 @@ err: static void sossi_cleanup(void) { omap_lcdc_free_dma_callback(); + clk_unprepare(sossi.fck); clk_put(sossi.fck); iounmap(sossi.base); } From 6c4d636bc00dc17c63ffb2a73a0da850240e26e3 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Thu, 10 Feb 2022 10:06:37 +0000 Subject: [PATCH 014/803] arm64: dts: meson: remove CPU opps below 1GHz for G12B boards Amlogic G12B devices experience CPU stalls and random board wedges when the system idles and CPU cores clock down to lower opp points. Recent vendor kernels include a change to remove 100-250MHz and other distro sources also remove the 500/667MHz points. Unless all 100-667Mhz opps are removed or the CPU governor forced to performance stalls are still observed, so let's remove them to improve stability and uptime. Fixes: b96d4e92709b ("arm64: dts: meson-g12b: support a311d and s922x cpu operating points") Signed-off-by: Christian Hewitt Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220210100638.19130-2-christianshewitt@gmail.com --- .../boot/dts/amlogic/meson-g12b-a311d.dtsi | 40 ------------------- .../boot/dts/amlogic/meson-g12b-s922x.dtsi | 40 ------------------- 2 files changed, 80 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi index d61f43052a34..8e9ad1e51d66 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <761000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi index 1e5d0ee5d541..44c23c984034 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; @@ -76,26 +56,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <751000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <751000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <751000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <751000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <771000>; From fd86d85401c2049f652293877c0f7e6e5afc3bbc Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Thu, 10 Feb 2022 10:06:38 +0000 Subject: [PATCH 015/803] arm64: dts: meson: remove CPU opps below 1GHz for SM1 boards Amlogic SM1 devices experience CPU stalls and random board wedges when the system idles and CPU cores clock down to lower opp points. Recent vendor kernels include a change to remove 100-250MHz and other distro sources also remove the 500/667MHz points. Unless all 100-667Mhz opps are removed or the CPU governor forced to performance stalls are still observed, so let's remove them to improve stability and uptime. Fixes: 3d9e76483049 ("arm64: dts: meson-sm1-sei610: enable DVFS") Signed-off-by: Christian Hewitt Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220210100638.19130-3-christianshewitt@gmail.com --- arch/arm64/boot/dts/amlogic/meson-sm1.dtsi | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi index 3c07a89bfd27..80737731af3f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi @@ -95,26 +95,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <730000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <730000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <730000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <666666666>; - opp-microvolt = <750000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <770000>; From 460bfa65b0de72f4d8a808bc7cfb1cb591a95b18 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Mar 2022 15:23:54 +0300 Subject: [PATCH 016/803] iio: dac: ad3552r: fix signedness bug in ad3552r_reset() The "val" variable is used to store either negative error codes from ad3552r_read_reg_wrapper() or positive u16 values on success. It needs to be signed for the error handling to work correctly. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20220316122354.GA16825@kili Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index 97f13c0b9631..e0a93b27e0e8 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -656,7 +656,7 @@ static int ad3552r_reset(struct ad3552r_desc *dac) { struct reg_addr_pool addr; int ret; - u16 val; + int val; dac->gpio_reset = devm_gpiod_get_optional(&dac->spi->dev, "reset", GPIOD_OUT_LOW); From f50232193e61cf89a73130b5e843fef30763c428 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 28 Feb 2022 18:52:23 -0800 Subject: [PATCH 017/803] iio: scd4x: check return of scd4x_write_and_fetch Clang static analysis reports this problem scd4x.c:474:10: warning: The left operand of '==' is a garbage value if (val == 0xff) { ~~~ ^ val is only set from a successful call to scd4x_write_and_fetch() So check it's return. Fixes: 49d22b695cbb ("drivers: iio: chemical: Add support for Sensirion SCD4x CO2 sensor") Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220301025223.223223-1-trix@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/scd4x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c index 20d4e7584e92..37143b5526ee 100644 --- a/drivers/iio/chemical/scd4x.c +++ b/drivers/iio/chemical/scd4x.c @@ -471,12 +471,15 @@ static ssize_t calibration_forced_value_store(struct device *dev, ret = scd4x_write_and_fetch(state, CMD_FRC, arg, &val, sizeof(val)); mutex_unlock(&state->lock); + if (ret) + return ret; + if (val == 0xff) { dev_err(dev, "forced calibration has failed"); return -EINVAL; } - return ret ?: len; + return len; } static IIO_DEVICE_ATTR_RW(calibration_auto_enable, 0); From d926054d5565d3cfa2c7c3f7a48e79bcc10453ed Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 27 Mar 2022 08:40:05 -0700 Subject: [PATCH 018/803] iio:imu:bmi160: disable regulator in error path Regulator should be disabled in error path as mentioned in _regulator_put(). Also disable accel if gyro cannot be enabled. [ 16.233604] WARNING: CPU: 0 PID: 2177 at drivers/regulator/core.c:2257 _regulator_put [ 16.240453] Call Trace: [ 16.240572] [ 16.240676] regulator_put+0x26/0x40 [ 16.240853] regulator_bulk_free+0x26/0x50 [ 16.241050] release_nodes+0x3f/0x70 [ 16.241225] devres_release_group+0x147/0x1c0 [ 16.241441] ? bmi160_core_probe+0x175/0x3a0 [bmi160_core] Fixes: 5dea3fb066f0 ("iio: imu: bmi160: added regulator support") Reviewed-by: Andy Shevchenko Signed-off-by: Tong Zhang Link: https://lore.kernel.org/r/20220327154005.806049-1-ztong0001@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi160/bmi160_core.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 824b5124a5f5..01336105792e 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -730,7 +730,7 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi) ret = regmap_write(data->regmap, BMI160_REG_CMD, BMI160_CMD_SOFTRESET); if (ret) - return ret; + goto disable_regulator; usleep_range(BMI160_SOFTRESET_USLEEP, BMI160_SOFTRESET_USLEEP + 1); @@ -741,29 +741,37 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi) if (use_spi) { ret = regmap_read(data->regmap, BMI160_REG_DUMMY, &val); if (ret) - return ret; + goto disable_regulator; } ret = regmap_read(data->regmap, BMI160_REG_CHIP_ID, &val); if (ret) { dev_err(dev, "Error reading chip id\n"); - return ret; + goto disable_regulator; } if (val != BMI160_CHIP_ID_VAL) { dev_err(dev, "Wrong chip id, got %x expected %x\n", val, BMI160_CHIP_ID_VAL); - return -ENODEV; + ret = -ENODEV; + goto disable_regulator; } ret = bmi160_set_mode(data, BMI160_ACCEL, true); if (ret) - return ret; + goto disable_regulator; ret = bmi160_set_mode(data, BMI160_GYRO, true); if (ret) - return ret; + goto disable_accel; return 0; + +disable_accel: + bmi160_set_mode(data, BMI160_ACCEL, false); + +disable_regulator: + regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies); + return ret; } static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig, From a2a43fd9d84aec15f8c3dc434d50cd59d8a116b2 Mon Sep 17 00:00:00 2001 From: Jose Cazarin Date: Fri, 25 Mar 2022 01:43:40 +0200 Subject: [PATCH 019/803] iio: dac: dac5571: Fix chip id detection for OF devices When matching an OF device, the match mechanism tries all components of the compatible property. This can result with a device matched with a compatible string that isn't the first in the compatible list. For instance, with a compatible property set to compatible = "ti,dac081c081", "ti,dac5571"; the driver will match the second compatible string, as the first one isn't listed in the of_device_id table. The device will however be named "dac081c081" by the I2C core. This causes an issue when identifying the chip. The probe function receives a i2c_device_id that comes from the module's I2C device ID table. There is no entry in that table for "dac081c081", which results in a NULL pointer passed to the probe function. To fix this, add chip_id information in the data field of the OF device ID table, and retrieve it with device_get_match_data() for OF devices. Signed-off-by: Jose Cazarin Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20220324234340.32402-1-laurent.pinchart@ideasonboard.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ti-dac5571.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index 4a3b8d875518..0b775f943db3 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -19,6 +19,7 @@ #include #include #include +#include #include enum chip_id { @@ -311,6 +312,7 @@ static int dac5571_probe(struct i2c_client *client, const struct dac5571_spec *spec; struct dac5571_data *data; struct iio_dev *indio_dev; + enum chip_id chip_id; int ret, i; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); @@ -326,7 +328,13 @@ static int dac5571_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = dac5571_channels; - spec = &dac5571_spec[id->driver_data]; + if (dev_fwnode(dev)) + chip_id = (uintptr_t)device_get_match_data(dev); + else + chip_id = id->driver_data; + + spec = &dac5571_spec[chip_id]; + indio_dev->num_channels = spec->num_channels; data->spec = spec; @@ -385,15 +393,15 @@ static int dac5571_remove(struct i2c_client *i2c) } static const struct of_device_id dac5571_of_id[] = { - {.compatible = "ti,dac5571"}, - {.compatible = "ti,dac6571"}, - {.compatible = "ti,dac7571"}, - {.compatible = "ti,dac5574"}, - {.compatible = "ti,dac6574"}, - {.compatible = "ti,dac7574"}, - {.compatible = "ti,dac5573"}, - {.compatible = "ti,dac6573"}, - {.compatible = "ti,dac7573"}, + {.compatible = "ti,dac5571", .data = (void *)single_8bit}, + {.compatible = "ti,dac6571", .data = (void *)single_10bit}, + {.compatible = "ti,dac7571", .data = (void *)single_12bit}, + {.compatible = "ti,dac5574", .data = (void *)quad_8bit}, + {.compatible = "ti,dac6574", .data = (void *)quad_10bit}, + {.compatible = "ti,dac7574", .data = (void *)quad_12bit}, + {.compatible = "ti,dac5573", .data = (void *)quad_8bit}, + {.compatible = "ti,dac6573", .data = (void *)quad_10bit}, + {.compatible = "ti,dac7573", .data = (void *)quad_12bit}, {} }; MODULE_DEVICE_TABLE(of, dac5571_of_id); From b55b38f7cc12da3b9ef36e7a3b7f8f96737df4d5 Mon Sep 17 00:00:00 2001 From: Zizhuang Deng Date: Thu, 10 Mar 2022 20:54:50 +0800 Subject: [PATCH 020/803] iio: dac: ad5592r: Fix the missing return value. The third call to `fwnode_property_read_u32` did not record the return value, resulting in `channel_offstate` possibly being assigned the wrong value. Fixes: 56ca9db862bf ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs") Signed-off-by: Zizhuang Deng Link: https://lore.kernel.org/r/20220310125450.4164164-1-sunsetdzz@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5592r-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index a424b7220b61..4434c1b2a322 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -522,7 +522,7 @@ static int ad5592r_alloc_channels(struct iio_dev *iio_dev) if (!ret) st->channel_modes[reg] = tmp; - fwnode_property_read_u32(child, "adi,off-state", &tmp); + ret = fwnode_property_read_u32(child, "adi,off-state", &tmp); if (!ret) st->channel_offstate[reg] = tmp; } From d85cce86a86746354fffb688dd134609c8277adc Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Sun, 20 Mar 2022 13:54:57 +0800 Subject: [PATCH 021/803] iio:filter:admv8818: select REGMAP_SPI for ADMV8818 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit admv8818 driver needs __devm_regmap_init_spi() which is defined when CONFIG_REGMAP_SPI is set and struct regmap_config when CONFIG_REGMAP is set, so automatically select CONFIG_REGMAP_SPI which also sets CONFIG_REGMAP. Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Wang ShaoBo Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20220320055457.254983-1-bobo.shaobowang@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/filter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/filter/Kconfig b/drivers/iio/filter/Kconfig index 3ae35817ad82..a85b345ea14e 100644 --- a/drivers/iio/filter/Kconfig +++ b/drivers/iio/filter/Kconfig @@ -8,6 +8,7 @@ menu "Filters" config ADMV8818 tristate "Analog Devices ADMV8818 High-Pass and Low-Pass Filter" depends on SPI && COMMON_CLK && 64BIT + select REGMAP_SPI help Say yes here to build support for Analog Devices ADMV8818 2 GHz to 18 GHz, Digitally Tunable, High-Pass and Low-Pass Filter. From 03779df928a6b34e18b28c17c94627fe014304b3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 26 Feb 2022 17:56:04 +0000 Subject: [PATCH 022/803] iio: adc: ad7280a: Fix wrong variable used when setting thresholds. Name of variable change missed in refactoring patch. Fixes: 112bf4aa4afb ("staging:iio:adc:ad7280a: Switch to standard event control") Reported-by: Colin Ian King Signed-off-by: Jonathan Cameron Cc: Marcelo Schmitt Reviewed-by: Marcelo Schmitt Link: https://lore.kernel.org/r/20220226175604.662422-1-jic23@kernel.org --- drivers/iio/adc/ad7280a.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad7280a.c b/drivers/iio/adc/ad7280a.c index ef9d27759961..ec9acbf12b9a 100644 --- a/drivers/iio/adc/ad7280a.c +++ b/drivers/iio/adc/ad7280a.c @@ -745,7 +745,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_RISING: addr = AD7280A_CELL_OVERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; st->cell_threshhigh = value; @@ -753,7 +753,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_FALLING: addr = AD7280A_CELL_UNDERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; st->cell_threshlow = value; @@ -770,18 +770,18 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_RISING: addr = AD7280A_AUX_ADC_OVERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; - st->aux_threshhigh = val; + st->aux_threshhigh = value; break; case IIO_EV_DIR_FALLING: addr = AD7280A_AUX_ADC_UNDERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; - st->aux_threshlow = val; + st->aux_threshlow = value; break; default: ret = -EINVAL; From 74a53a959028e5f28e3c0e9445a876e5c8da147c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 31 Mar 2022 14:04:25 -0700 Subject: [PATCH 023/803] iio:proximity:sx_common: Fix device property parsing on DT systems After commit 7a3605bef878 ("iio: sx9310: Support ACPI property") we started using the 'indio_dev->dev' to extract device properties for various register settings in sx9310_get_default_reg(). This broke DT based systems because dev_fwnode() used in the device_property*() APIs can't find an 'of_node'. That's because the 'indio_dev->dev.of_node' pointer isn't set until iio_device_register() is called. Set the pointer earlier, next to where the ACPI companion is set, so that the device property APIs work on DT systems. Cc: Gwendal Grignou Fixes: 7a3605bef878 ("iio: sx9310: Support ACPI property") Signed-off-by: Stephen Boyd Reviewed-by: Gwendal Grignou Link: https://lore.kernel.org/r/20220331210425.3908278-1-swboyd@chromium.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/proximity/sx_common.c b/drivers/iio/proximity/sx_common.c index a7c07316a0a9..8ad814d96b7e 100644 --- a/drivers/iio/proximity/sx_common.c +++ b/drivers/iio/proximity/sx_common.c @@ -521,6 +521,7 @@ int sx_common_probe(struct i2c_client *client, return dev_err_probe(dev, ret, "error reading WHOAMI\n"); ACPI_COMPANION_SET(&indio_dev->dev, ACPI_COMPANION(dev)); + indio_dev->dev.of_node = client->dev.of_node; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = data->chip_info->iio_channels; From 108e4d4de2b58011eafd14581b6ea7469f1fc467 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 24 Mar 2022 15:29:28 -0700 Subject: [PATCH 024/803] iio:proximity:sx9324: Fix hardware gain read/write There are four possible gain values according to 'sx9324_gain_vals[]': 1, 2, 4, and 8 The values are off by one when writing and reading the register. The bits should be set according to this equation: ilog2() + 1 so that a gain of 8 is 0x4 in the register field and a gain of 4 is 0x3 in the register field, etc. Note that a gain of 0 is reserved per the datasheet. The default gain (SX9324_REG_PROX_CTRL0_GAIN_1) is also wrong. It should be 0x1 << 3, i.e. 0x8, not 0x80 which is setting the reserved bit 7. Fix this all up to properly handle the hardware gain and return errors for invalid settings. Fixes: 4c18a890dff8 ("iio:proximity:sx9324: Add SX9324 support") Signed-off-by: Stephen Boyd Reviewed-by: Gwendal Grignou Link: https://lore.kernel.org/r/20220324222928.874522-1-swboyd@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9324.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index 0d9bbbb50cb4..6e90917e3e36 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -76,7 +76,10 @@ #define SX9324_REG_PROX_CTRL0 0x30 #define SX9324_REG_PROX_CTRL0_GAIN_MASK GENMASK(5, 3) -#define SX9324_REG_PROX_CTRL0_GAIN_1 0x80 +#define SX9324_REG_PROX_CTRL0_GAIN_SHIFT 3 +#define SX9324_REG_PROX_CTRL0_GAIN_RSVD 0x0 +#define SX9324_REG_PROX_CTRL0_GAIN_1 0x1 +#define SX9324_REG_PROX_CTRL0_GAIN_8 0x4 #define SX9324_REG_PROX_CTRL0_RAWFILT_MASK GENMASK(2, 0) #define SX9324_REG_PROX_CTRL0_RAWFILT_1P50 0x01 #define SX9324_REG_PROX_CTRL1 0x31 @@ -379,7 +382,14 @@ static int sx9324_read_gain(struct sx_common_data *data, if (ret) return ret; - *val = 1 << FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval); + regval = FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval); + if (regval) + regval--; + else if (regval == SX9324_REG_PROX_CTRL0_GAIN_RSVD || + regval > SX9324_REG_PROX_CTRL0_GAIN_8) + return -EINVAL; + + *val = 1 << regval; return IIO_VAL_INT; } @@ -725,8 +735,12 @@ static int sx9324_write_gain(struct sx_common_data *data, unsigned int gain, reg; int ret; - gain = ilog2(val); reg = SX9324_REG_PROX_CTRL0 + chan->channel / 2; + + gain = ilog2(val) + 1; + if (val <= 0 || gain > SX9324_REG_PROX_CTRL0_GAIN_8) + return -EINVAL; + gain = FIELD_PREP(SX9324_REG_PROX_CTRL0_GAIN_MASK, gain); mutex_lock(&data->mutex); @@ -784,9 +798,11 @@ static const struct sx_common_reg_default sx9324_default_regs[] = { { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM }, { SX9324_REG_AFE_CTRL9, SX9324_REG_AFE_CTRL9_AGAIN_1 }, - { SX9324_REG_PROX_CTRL0, SX9324_REG_PROX_CTRL0_GAIN_1 | + { SX9324_REG_PROX_CTRL0, + SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT | SX9324_REG_PROX_CTRL0_RAWFILT_1P50 }, - { SX9324_REG_PROX_CTRL1, SX9324_REG_PROX_CTRL0_GAIN_1 | + { SX9324_REG_PROX_CTRL1, + SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT | SX9324_REG_PROX_CTRL0_RAWFILT_1P50 }, { SX9324_REG_PROX_CTRL2, SX9324_REG_PROX_CTRL2_AVGNEG_THRESH_16K }, { SX9324_REG_PROX_CTRL3, SX9324_REG_PROX_CTRL3_AVGDEB_2SAMPLES | From 9fe4e0d3cbfe90152137963cc024ecb63db6e8e6 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Sun, 3 Apr 2022 00:03:13 +0800 Subject: [PATCH 025/803] mtd: rawnand: fix ecc parameters for mt7622 According to the datasheet, mt7622 only has 5 ECC capabilities instead of 7, and the decoding error register is arranged as follows: +------+---------+---------+---------+---------+ | Bits | 19:15 | 14:10 | 9:5 | 4:0 | +------+---------+---------+---------+---------+ | Name | ERRNUM3 | ERRNUM2 | ERRNUM1 | ERRNUM0 | +------+---------+---------+---------+---------+ This means err_mask should be 0x1f instead of 0x3f and the number of bits shifted in mtk_ecc_get_stats should be 5 instead of 8. This commit introduces err_shift for the difference in this register and fix other existing parameters. Public MT7622 reference manual can be found on [0] and the info this commit is based on is from page 656 and page 660. [0]: https://wiki.banana-pi.org/Banana_Pi_BPI-R64#Documents Fixes: 98dea8d71931 ("mtd: nand: mtk: Support MT7622 NAND flash controller.") Signed-off-by: Chuanhong Guo Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220402160315.919094-1-gch981213@gmail.com --- drivers/mtd/nand/raw/mtk_ecc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c index e7df3dac705e..49ab3448b9b1 100644 --- a/drivers/mtd/nand/raw/mtk_ecc.c +++ b/drivers/mtd/nand/raw/mtk_ecc.c @@ -43,6 +43,7 @@ struct mtk_ecc_caps { u32 err_mask; + u32 err_shift; const u8 *ecc_strength; const u32 *ecc_regs; u8 num_ecc_strength; @@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = { }; static const u8 ecc_strength_mt7622[] = { - 4, 6, 8, 10, 12, 14, 16 + 4, 6, 8, 10, 12 }; enum mtk_ecc_regs { @@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats, for (i = 0; i < sectors; i++) { offset = (i >> 2) << 2; err = readl(ecc->regs + ECC_DECENUM0 + offset); - err = err >> ((i % 4) * 8); + err = err >> ((i % 4) * ecc->caps->err_shift); err &= ecc->caps->err_mask; if (err == ecc->caps->err_mask) { /* uncorrectable errors */ @@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits); static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { .err_mask = 0x3f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2701, .ecc_regs = mt2701_ecc_regs, .num_ecc_strength = 20, @@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { .err_mask = 0x7f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2712, .ecc_regs = mt2712_ecc_regs, .num_ecc_strength = 23, @@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { }; static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = { - .err_mask = 0x3f, + .err_mask = 0x1f, + .err_shift = 5, .ecc_strength = ecc_strength_mt7622, .ecc_regs = mt7622_ecc_regs, - .num_ecc_strength = 7, + .num_ecc_strength = 5, .ecc_mode_shift = 4, .parity_bits = 13, .pg_irq_sel = 0, From d6732317a5525a95a7eb2d12b46e3e42d321c6b6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 25 Mar 2022 15:24:34 -0700 Subject: [PATCH 026/803] hwmon: (xdpe12284) Fix build warning seen if CONFIG_SENSORS_XDPE122_REGULATOR is disabled 0-day reports: drivers/hwmon/pmbus/xdpe12284.c:127:36: warning: unused variable 'xdpe122_reg_desc' This is seen if CONFIG_SENSORS_XDPE122_REGULATOR is not enabled. Mark xdpe122_reg_desc as __maybe_unused to fix the problem. Fixes: f53bfe4d6984 ("hwmon: (xdpe12284) Add regulator support") Reported-by: kernel test robot Cc: Marcello Sylvester Bauer Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/xdpe12284.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c index 18fffc5d749b..32bc7736d609 100644 --- a/drivers/hwmon/pmbus/xdpe12284.c +++ b/drivers/hwmon/pmbus/xdpe12284.c @@ -124,7 +124,7 @@ static int xdpe122_identify(struct i2c_client *client, return 0; } -static const struct regulator_desc xdpe122_reg_desc[] = { +static const struct regulator_desc __maybe_unused xdpe122_reg_desc[] = { PMBUS_REGULATOR("vout", 0), PMBUS_REGULATOR("vout", 1), }; From 4fd45cc8568e6086272d3036f2c29d61e9b776a1 Mon Sep 17 00:00:00 2001 From: Denis Pauk Date: Sun, 3 Apr 2022 22:34:54 +0300 Subject: [PATCH 027/803] hwmon: (asus_wmi_sensors) Fix CROSSHAIR VI HERO name CROSSHAIR VI HERO motherboard is incorrectly named as ROG CROSSHAIR VI HERO. Signed-off-by: Denis Pauk Link: https://lore.kernel.org/r/20220403193455.1363-1-pauk.denis@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_wmi_sensors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c index 8fdcb62ae52d..9e935e34c998 100644 --- a/drivers/hwmon/asus_wmi_sensors.c +++ b/drivers/hwmon/asus_wmi_sensors.c @@ -71,7 +71,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"), - DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), From abb860ac7e3f022a233f34b12d035d49abfc114d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 31 Mar 2022 21:45:26 +0200 Subject: [PATCH 028/803] pinctrl: samsung: staticize fsd_pin_ctrl struct fsd_pin_ctrl is not used outside of the file, so it can be made static. This fixes sparse warning: drivers/pinctrl/samsung/pinctrl-exynos-arm64.c:773:31: sparse: symbol 'fsd_pin_ctrl' was not declared. Should it be static? Reported-by: kernel test robot Fixes: 0d1b662c374c ("pinctrl: samsung: add FSD SoC specific data") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20220331194526.52444-1-krzysztof.kozlowski@linaro.org --- drivers/pinctrl/samsung/pinctrl-exynos-arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index d291819c2f77..cb965cf93705 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -770,7 +770,7 @@ static const struct samsung_pin_bank_data fsd_pin_banks2[] __initconst = { EXYNOS850_PIN_BANK_EINTN(3, 0x00, "gpq0"), }; -const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = { { /* pin-controller instance 0 FSYS0 data */ .pin_banks = fsd_pin_banks0, From b5e22886839ae466fcf03295150094516c0fd8eb Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 16 Mar 2022 21:50:47 +0800 Subject: [PATCH 029/803] tee: optee: add missing mutext_destroy in optee_ffa_probe The error handling code of optee_ffa_probe misses the mutex_destroy of ffa.mutex when mutext_init succeeds. Fix this by adding mutex_destory of ffa.mutex at the error handling part Fixes: aceeafefff73 ("optee: use driver internal tee_context for some rpc") Signed-off-by: Dongliang Mu Signed-off-by: Jens Wiklander --- drivers/tee/optee/ffa_abi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index a5eb4ef46971..c9b3b2cfb2b2 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -865,6 +865,7 @@ err_rhashtable_free: rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL); optee_supp_uninit(&optee->supp); mutex_destroy(&optee->call_queue.mutex); + mutex_destroy(&optee->ffa.mutex); err_unreg_supp_teedev: tee_device_unregister(optee->supp_teedev); err_unreg_teedev: From 0be0b70df6611205ac392d0e21f7e077f3230ee6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Apr 2022 20:02:51 +0300 Subject: [PATCH 030/803] pinctrl: alderlake: Fix register offsets for ADL-N variant It appears that almost traditionally the N variants have deviations in the register offsets in comparison to S one. This is the case for Intel Alder Lake as well. Fix register offsets for ADL-N variant. Fixes: 114b610b9048 ("pinctrl: alderlake: Add Intel Alder Lake-N pin controller support") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-alderlake.c | 60 +++++++++++++++-------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-alderlake.c b/drivers/pinctrl/intel/pinctrl-alderlake.c index 32ba50efbceb..62dbd1e67513 100644 --- a/drivers/pinctrl/intel/pinctrl-alderlake.c +++ b/drivers/pinctrl/intel/pinctrl-alderlake.c @@ -14,11 +14,17 @@ #include "pinctrl-intel.h" -#define ADL_PAD_OWN 0x0a0 -#define ADL_PADCFGLOCK 0x110 -#define ADL_HOSTSW_OWN 0x150 -#define ADL_GPI_IS 0x200 -#define ADL_GPI_IE 0x220 +#define ADL_N_PAD_OWN 0x020 +#define ADL_N_PADCFGLOCK 0x080 +#define ADL_N_HOSTSW_OWN 0x0b0 +#define ADL_N_GPI_IS 0x100 +#define ADL_N_GPI_IE 0x120 + +#define ADL_S_PAD_OWN 0x0a0 +#define ADL_S_PADCFGLOCK 0x110 +#define ADL_S_HOSTSW_OWN 0x150 +#define ADL_S_GPI_IS 0x200 +#define ADL_S_GPI_IE 0x220 #define ADL_GPP(r, s, e, g) \ { \ @@ -28,14 +34,28 @@ .gpio_base = (g), \ } -#define ADL_COMMUNITY(b, s, e, g) \ +#define ADL_N_COMMUNITY(b, s, e, g) \ { \ .barno = (b), \ - .padown_offset = ADL_PAD_OWN, \ - .padcfglock_offset = ADL_PADCFGLOCK, \ - .hostown_offset = ADL_HOSTSW_OWN, \ - .is_offset = ADL_GPI_IS, \ - .ie_offset = ADL_GPI_IE, \ + .padown_offset = ADL_N_PAD_OWN, \ + .padcfglock_offset = ADL_N_PADCFGLOCK, \ + .hostown_offset = ADL_N_HOSTSW_OWN, \ + .is_offset = ADL_N_GPI_IS, \ + .ie_offset = ADL_N_GPI_IE, \ + .pin_base = (s), \ + .npins = ((e) - (s) + 1), \ + .gpps = (g), \ + .ngpps = ARRAY_SIZE(g), \ + } + +#define ADL_S_COMMUNITY(b, s, e, g) \ + { \ + .barno = (b), \ + .padown_offset = ADL_S_PAD_OWN, \ + .padcfglock_offset = ADL_S_PADCFGLOCK, \ + .hostown_offset = ADL_S_HOSTSW_OWN, \ + .is_offset = ADL_S_GPI_IS, \ + .ie_offset = ADL_S_GPI_IE, \ .pin_base = (s), \ .npins = ((e) - (s) + 1), \ .gpps = (g), \ @@ -342,10 +362,10 @@ static const struct intel_padgroup adln_community5_gpps[] = { }; static const struct intel_community adln_communities[] = { - ADL_COMMUNITY(0, 0, 66, adln_community0_gpps), - ADL_COMMUNITY(1, 67, 168, adln_community1_gpps), - ADL_COMMUNITY(2, 169, 248, adln_community4_gpps), - ADL_COMMUNITY(3, 249, 256, adln_community5_gpps), + ADL_N_COMMUNITY(0, 0, 66, adln_community0_gpps), + ADL_N_COMMUNITY(1, 67, 168, adln_community1_gpps), + ADL_N_COMMUNITY(2, 169, 248, adln_community4_gpps), + ADL_N_COMMUNITY(3, 249, 256, adln_community5_gpps), }; static const struct intel_pinctrl_soc_data adln_soc_data = { @@ -713,11 +733,11 @@ static const struct intel_padgroup adls_community5_gpps[] = { }; static const struct intel_community adls_communities[] = { - ADL_COMMUNITY(0, 0, 94, adls_community0_gpps), - ADL_COMMUNITY(1, 95, 150, adls_community1_gpps), - ADL_COMMUNITY(2, 151, 199, adls_community3_gpps), - ADL_COMMUNITY(3, 200, 269, adls_community4_gpps), - ADL_COMMUNITY(4, 270, 303, adls_community5_gpps), + ADL_S_COMMUNITY(0, 0, 94, adls_community0_gpps), + ADL_S_COMMUNITY(1, 95, 150, adls_community1_gpps), + ADL_S_COMMUNITY(2, 151, 199, adls_community3_gpps), + ADL_S_COMMUNITY(3, 200, 269, adls_community4_gpps), + ADL_S_COMMUNITY(4, 270, 303, adls_community5_gpps), }; static const struct intel_pinctrl_soc_data adls_soc_data = { From 9b6d368b082e1922ae55a669769bc98fba9e4833 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 23 Feb 2022 11:51:45 +0800 Subject: [PATCH 031/803] bus: imx-weim: fix NULL but dereferenced coccicheck error Fix following coccicheck warning: ./drivers/bus/imx-weim.c:355:18-21: ERROR: pdev is NULL but dereferenced. Signed-off-by: Wan Jiabing Acked-by: Ivan Bornyakov Signed-off-by: Shawn Guo --- drivers/bus/imx-weim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 60fbd42041dd..2ea0a51f79f6 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -352,8 +352,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, pdev = of_find_device_by_node(rd->dn); if (!pdev) { - dev_err(&pdev->dev, - "Could not find platform device for '%pOF'\n", + pr_err("Could not find platform device for '%pOF'\n", rd->dn); ret = notifier_from_errno(-EINVAL); From dc900431337f5f861e3cc47ec5be5a69db40ee34 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 28 Feb 2022 11:16:17 +0100 Subject: [PATCH 032/803] arm64: dts: imx8mm-venice: fix spi2 pin configuration Due to what looks like a copy-paste error, the ECSPI2_MISO pad is not muxed for SPI mode and causes reads from a slave-device connected to the SPI header to always return zero. Configure the ECSPI2_MISO pad for SPI mode on the gw71xx, gw72xx and gw73xx families of boards that got this wrong. Fixes: 6f30b27c5ef5 ("arm64: dts: imx8mm: Add Gateworks i.MX 8M Mini Development Kits") Cc: stable@vger.kernel.org # 5.12 Cc: Tim Harvey Signed-off-by: Johan Hovold Acked-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index 73addc0b8e57..6acea1c28779 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -215,7 +215,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi index 1e7badb2a82e..353c3dc19d2a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi @@ -309,7 +309,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 426483ec1f88..1db2e254af3a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -358,7 +358,7 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6 - MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; From 3739157768d746e581697c4cbd7ceb3a28040c06 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Feb 2022 23:46:22 +0300 Subject: [PATCH 033/803] ARM: tegra_defconfig: Update CONFIG_TEGRA_VDE option The CONFIG_TEGRA_VDE has been deprecated and replaced with the new V4L options after de-staging of the tegra-vde driver. Update the config entry. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- arch/arm/configs/tegra_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 289d022acc4b..c209722399d7 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -286,7 +286,8 @@ CONFIG_SERIO_NVEC_PS2=y CONFIG_NVEC_POWER=y CONFIG_NVEC_PAZ00=y CONFIG_STAGING_MEDIA=y -CONFIG_TEGRA_VDE=y +CONFIG_V4L_MEM2MEM_DRIVERS=y +CONFIG_VIDEO_TEGRA_VDE=y CONFIG_CHROME_PLATFORMS=y CONFIG_CROS_EC=y CONFIG_CROS_EC_I2C=m From 39ad93d280506f4953a9d0c545cfffa581889326 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Feb 2022 23:46:23 +0300 Subject: [PATCH 034/803] ARM: config: multi v7: Enable NVIDIA Tegra video decoder driver Enable NVIDIA Tegra V4L2 video decoder driver. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6e0c8c19b35c..d6a6811f0539 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -673,6 +673,7 @@ CONFIG_VIDEO_STI_DELTA=m CONFIG_VIDEO_RENESAS_FDP1=m CONFIG_VIDEO_RENESAS_JPU=m CONFIG_VIDEO_RENESAS_VSP1=m +CONFIG_VIDEO_TEGRA_VDE=m CONFIG_V4L_TEST_DRIVERS=y CONFIG_VIDEO_VIVID=m CONFIG_VIDEO_ADV7180=m From c887bdc4fb254a871e6180e18203152c548419f1 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 28 Mar 2022 15:39:31 +0800 Subject: [PATCH 035/803] clk: sunxi-ng: fix not NULL terminated coccicheck error Fix the following coccicheck error: ./drivers/clk/sunxi-ng/ccu-sun6i-rtc.c:348:1-2: sun6i_rtc_ccu_match is not NULL terminated at line 348 Fixes: d91612d7f01a ("clk: sunxi-ng: Add support for the sun6i RTC clocks") Signed-off-by: Wan Jiabing Reviewed-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220328073931.36544-1-wanjiabing@vivo.com --- drivers/clk/sunxi-ng/ccu-sun6i-rtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c index 8a10bade7e0d..ffb72d9a9c36 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c @@ -346,6 +346,7 @@ static const struct of_device_id sun6i_rtc_ccu_match[] = { .compatible = "allwinner,sun50i-r329-rtc", .data = &sun50i_r329_rtc_ccu_data, }, + {}, }; int sun6i_rtc_ccu_probe(struct device *dev, void __iomem *reg) From 836ffc47fa245e58cae51ac40c5ef71be8f4d480 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Thu, 7 Apr 2022 09:01:22 +0000 Subject: [PATCH 036/803] video: fbdev: imxfb: Fix missing of_node_put in imxfb_probe of_parse_phandle returns node pointer with refcount incremented, use of_node_put() on it when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Helge Deller --- drivers/video/fbdev/imxfb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index 68288756ffff..a2f644c97f28 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -925,10 +925,12 @@ static int imxfb_probe(struct platform_device *pdev) sizeof(struct imx_fb_videomode), GFP_KERNEL); if (!fbi->mode) { ret = -ENOMEM; + of_node_put(display_np); goto failed_of_parse; } ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode); + of_node_put(display_np); if (ret) goto failed_of_parse; } From 8de8b71b787f38983d414d2dba169a3bfefa668a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 6 Apr 2022 17:58:04 +0200 Subject: [PATCH 037/803] xsk: Fix l2fwd for copy mode + busy poll combo While checking AF_XDP copy mode combined with busy poll, strange results were observed. rxdrop and txonly scenarios worked fine, but l2fwd broke immediately. After a deeper look, it turned out that for l2fwd, Tx side was exiting early due to xsk_no_wakeup() returning true and in the end xsk_generic_xmit() was never called. Note that AF_XDP Tx in copy mode is syscall steered, so the current behavior is broken. Txonly scenario only worked due to the fact that sk_mark_napi_id_once_xdp() was never called - since Rx side is not in the picture for this case and mentioned function is called in xsk_rcv_check(), sk::sk_napi_id was never set, which in turn meant that xsk_no_wakeup() was returning false (see the sk->sk_napi_id >= MIN_NAPI_ID check in there). To fix this, prefer busy poll in xsk_sendmsg() only when zero copy is enabled on a given AF_XDP socket. By doing so, busy poll in copy mode would not exit early on Tx side and eventually xsk_generic_xmit() will be called. Fixes: a0731952d9cd ("xsk: Add busy-poll support for {recv,send}msg()") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220406155804.434493-1-maciej.fijalkowski@intel.com --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2c34caee0fd1..7d3a00cb24ec 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (sk_can_busy_loop(sk)) sk_busy_loop(sk, 1); /* only support non-blocking sockets */ - if (xsk_no_wakeup(sk)) + if (xs->zc && xsk_no_wakeup(sk)) return 0; pool = xs->pool; From 9af9c58a099b57b818b15eca1e50cef1d222406e Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Fri, 8 Apr 2022 15:09:01 +0800 Subject: [PATCH 038/803] arm64: dts: remove cpu compatible "arm,armv8" for s4 Amlogic s4 device is already applied, but cpu compatible 'arm,armv8' is only valid for software models, so we remove it. Fixes: ac4dfd0d1d35 ("arm64: dts: add support for S4 based Amlogic AQ222") Signed-off-by: Xianwei Zhao Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220408070901.26446-1-xianwei.zhao@amlogic.com --- arch/arm64/boot/dts/amlogic/meson-s4.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi index bf9ae1e1016b..480afa2cc61f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi @@ -13,28 +13,28 @@ cpu0: cpu@0 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x0>; enable-method = "psci"; }; cpu1: cpu@1 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x1>; enable-method = "psci"; }; cpu2: cpu@2 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x2>; enable-method = "psci"; }; cpu3: cpu@3 { device_type = "cpu"; - compatible = "arm,cortex-a35","arm,armv8"; + compatible = "arm,cortex-a35"; reg = <0x0 0x3>; enable-method = "psci"; }; From 7b2666ce445c700b8dcee994da44ddcf050a0842 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 7 Apr 2022 12:13:12 +0200 Subject: [PATCH 039/803] hwmon: (adt7470) Fix warning on module removal When removing the adt7470 module, a warning might be printed: do not call blocking ops when !TASK_RUNNING; state=1 set at [] adt7470_update_thread+0x7b/0x130 [adt7470] This happens because adt7470_update_thread() can leave the kthread in TASK_INTERRUPTIBLE state when the kthread is being stopped before the call of set_current_state(). Since kthread_exit() might sleep in exit_signals(), the warning is printed. Fix that by using schedule_timeout_interruptible() and removing the call of set_current_state(). This causes TASK_INTERRUPTIBLE to be set after kthread_should_stop() which might cause the kthread to exit. Reported-by: Zheyu Ma Fixes: 93cacfd41f82 (hwmon: (adt7470) Allow faster removal) Signed-off-by: Armin Wolf Tested-by: Zheyu Ma Link: https://lore.kernel.org/r/20220407101312.13331-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7470.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index fb6d14d213a1..c67cd037a93f 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -294,11 +295,10 @@ static int adt7470_update_thread(void *p) adt7470_read_temperatures(data); mutex_unlock(&data->lock); - set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; - schedule_timeout(msecs_to_jiffies(data->auto_update_interval)); + schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval)); } return 0; From dd2737fab4a6ce9ba4eb84842bedbd87d55241a6 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 4 Mar 2022 16:04:43 +0800 Subject: [PATCH 040/803] arm64: dts: imx8qm: Correct SCU clock controller's compatible property The fsl,scu.txt dt-binding documentation explicitly mentions that the compatible string should be either "fsl,imx8qm-clock" or "fsl,imx8qxp-clock", followed by "fsl,scu-clk". Also, i.MX8qm SCU clocks and i.MX8qxp SCU clocks are really not the same, so we have to set the compatible property according to SoC name. Let's correct the i.MX8qm clock controller's compatible property from "fsl,imx8qxp-clk", "fsl,scu-clk" to "fsl,imx8qm-clk", "fsl,scu-clk" . Fixes: f2180be18a63 ("arm64: dts: imx: add imx8qm common dts file") Cc: Rob Herring Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Signed-off-by: Liu Ying Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qm.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi index be8c76a0554c..4f767012f1f5 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi @@ -196,7 +196,7 @@ }; clk: clock-controller { - compatible = "fsl,imx8qxp-clk", "fsl,scu-clk"; + compatible = "fsl,imx8qm-clk", "fsl,scu-clk"; #clock-cells = <2>; }; From e91ac20889d1a26d077cc511365cd7ff4346a6f3 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 8 Apr 2022 16:48:21 +0300 Subject: [PATCH 041/803] USB: Fix xhci event ring dequeue pointer ERDP update issue In some situations software handles TRB events slower than adding TRBs. If the number of TRB events to be processed in a given interrupt is exactly the same as the event ring size 256, then the local variable "event_ring_deq" that holds the initial dequeue position is equal to software_dequeue after handling all 256 interrupts. It will cause driver to not update ERDP to hardware, Software dequeue pointer is out of sync with ERDP on interrupt exit. On the next interrupt, the event ring may full but driver will not update ERDP as software_dequeue is equal to ERDP. [ 536.377115] xhci_hcd 0000:00:12.0: ERROR unknown event type 37 [ 566.933173] sd 8:0:0:0: [sdb] tag#27 uas_eh_abort_handler 0 uas-tag 7 inflight: CMD OUT [ 566.933181] sd 8:0:0:0: [sdb] tag#27 CDB: Write(10) 2a 00 17 71 e6 78 00 00 08 00 [ 572.041186] xhci_hcd On some situataions,the0000:00:12.0: xHCI host not responding to stop endpoint command. [ 572.057193] xhci_hcd 0000:00:12.0: Host halt failed, -110 [ 572.057196] xhci_hcd 0000:00:12.0: xHCI host controller not responding, assume dead [ 572.057236] sd 8:0:0:0: [sdb] tag#26 uas_eh_abort_handler 0 uas-tag 6 inflight: CMD [ 572.057240] sd 8:0:0:0: [sdb] tag#26 CDB: Write(10) 2a 00 38 eb cc d8 00 00 08 00 [ 572.057244] sd 8:0:0:0: [sdb] tag#25 uas_eh_abort_handler 0 uas-tag 5 inflight: CMD Hardware ERDP is updated mid event handling if there are more than 128 events in an interrupt (half of ring size). Fix this by updating the software local variable at the same time as hardware ERDP. [commit message rewording -Mathias] Fixes: dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose") Reviewed-by: Peter Chen Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220408134823.2527272-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d0b6806275e0..f9707997969d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3141,6 +3141,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; xhci_update_erst_dequeue(xhci, event_ring_deq); + event_ring_deq = xhci->event_ring->dequeue; /* ring is half-full, force isoc trbs to interrupt more often */ if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN) From dc92944a014cd6a6f6c94299aaa36164dd2c238a Mon Sep 17 00:00:00 2001 From: Henry Lin Date: Fri, 8 Apr 2022 16:48:22 +0300 Subject: [PATCH 042/803] xhci: stop polling roothubs after shutdown While rebooting, XHCI controller and its bus device will be shut down in order by .shutdown callback. Stopping roothubs polling in xhci_shutdown() can prevent XHCI driver from accessing port status after its bus device shutdown. Take PCIe XHCI controller as example, if XHCI driver doesn't stop roothubs polling, XHCI driver may access PCIe BAR register for port status after parent PCIe root port driver is shutdown and cause PCIe bus error. [check shared hcd exist before stopping its roothub polling -Mathias] Cc: stable@vger.kernel.org Signed-off-by: Henry Lin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220408134823.2527272-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 642610c78f58..25b87e99b4dd 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -781,6 +781,17 @@ void xhci_shutdown(struct usb_hcd *hcd) if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); + /* Don't poll the roothubs after shutdown. */ + xhci_dbg(xhci, "%s: stopping usb%d port polling.\n", + __func__, hcd->self.busnum); + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); + del_timer_sync(&hcd->rh_timer); + + if (xhci->shared_hcd) { + clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); + del_timer_sync(&xhci->shared_hcd->rh_timer); + } + spin_lock_irq(&xhci->lock); xhci_halt(xhci); /* Workaround for spurious wakeups at shutdown with HSW */ From 33597f0c48be0836854d43c577e35c8f8a765a7d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Apr 2022 16:48:23 +0300 Subject: [PATCH 043/803] xhci: increase usb U3 -> U0 link resume timeout from 100ms to 500ms The first U3 wake signal by the host may be lost if the USB 3 connection is tunneled over USB4, with a runtime suspended USB4 host, and firmware implemented connection manager. Specs state the host must wait 100ms (tU3WakeupRetryDelay) before resending a U3 wake signal if device doesn't respond, leading to U3 -> U0 link transition times around 270ms in the tunneled case. Fixes: 0200b9f790b0 ("xhci: Wait until link state trainsits to U0 after setting USB_SS_PORT_LS_U0") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220408134823.2527272-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1e7dc130c39a..f65f1ba2b592 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1434,7 +1434,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } spin_unlock_irqrestore(&xhci->lock, flags); if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex], - msecs_to_jiffies(100))) + msecs_to_jiffies(500))) xhci_dbg(xhci, "missing U0 port change event for port %d-%d\n", hcd->self.busnum, wIndex + 1); spin_lock_irqsave(&xhci->lock, flags); From b3fa25de31fb7e9afebe9599b8ff32eda13d7c94 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 29 Mar 2022 10:46:05 +0200 Subject: [PATCH 044/803] usb: cdns3: Fix issue for clear halt endpoint Path fixes bug which occurs during resetting endpoint in __cdns3_gadget_ep_clear_halt function. During resetting endpoint controller will change HW/DMA owned TRB. It set Abort flag in trb->control and will change trb->length field. If driver want to use the aborted trb it must update the changed field in TRB. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") cc: Acked-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20220329084605.4022-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index f9af7ebe003d..d6d515d598dc 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2684,6 +2684,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) struct usb_request *request; struct cdns3_request *priv_req; struct cdns3_trb *trb = NULL; + struct cdns3_trb trb_tmp; int ret; int val; @@ -2693,8 +2694,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { priv_req = to_cdns3_request(request); trb = priv_req->trb; - if (trb) + if (trb) { + trb_tmp = *trb; trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE); + } } writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd); @@ -2709,7 +2712,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { if (trb) - trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE); + *trb = trb_tmp; cdns3_rearm_transfer(priv_ep, 1); } From e2aa165cd0163cef83cb295eb572aa9fb1604cf4 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 20 Mar 2022 15:52:12 -0500 Subject: [PATCH 045/803] soc: imx: imx8m-blk-ctrl: Fix IMX8MN_DISPBLK_PD_ISI hang The imx8mn clock list for the ISI lists four clocks, but DOMAIN_MAX_CLKS was set to 3. Because of this, attempts to enable the fourth clock failed, threw some splat, and ultimately hung. Fixes: 7f511d514e8c ("soc: imx: imx8m-blk-ctrl: add i.MX8MN DISP blk-ctrl") Signed-off-by: Adam Ford Reviewed-by: Lucas Stach Signed-off-by: Shawn Guo --- drivers/soc/imx/imx8m-blk-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c index 122f9c884b38..ccd0577a771e 100644 --- a/drivers/soc/imx/imx8m-blk-ctrl.c +++ b/drivers/soc/imx/imx8m-blk-ctrl.c @@ -50,7 +50,7 @@ struct imx8m_blk_ctrl_domain_data { u32 mipi_phy_rst_mask; }; -#define DOMAIN_MAX_CLKS 3 +#define DOMAIN_MAX_CLKS 4 struct imx8m_blk_ctrl_domain { struct generic_pm_domain genpd; From fa51e1dc4b91375bc18349663a52395ad585bd3c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 26 Mar 2022 12:14:55 -0300 Subject: [PATCH 046/803] ARM: dts: imx6qdl-apalis: Fix sgtl5000 detection issue On a custom carrier board with a i.MX6Q Apalis SoM, the sgtl5000 codec on the SoM is often not detected and the following error message is seen when the sgtl5000 driver tries to read the ID register: sgtl5000 1-000a: Error reading chip id -6 The reason for the error is that the MCLK clock is not provided early enough. Fix the problem by describing the MCLK pinctrl inside the codec node instead of placing it inside the audmux pinctrl group. With this change applied the sgtl5000 is always detected on every boot. Fixes: 693e3ffaae5a ("ARM: dts: imx6: Add support for Toradex Apalis iMX6Q/D SoM") Signed-off-by: Fabio Estevam Reviewed-by: Tim Harvey Acked-by: Max Krummenacher Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-apalis.dtsi | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi index ed2739e39085..bd763bae596b 100644 --- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi @@ -286,6 +286,8 @@ codec: sgtl5000@a { compatible = "fsl,sgtl5000"; reg = <0x0a>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sgtl5000>; clocks = <&clks IMX6QDL_CLK_CKO>; VDDA-supply = <®_module_3v3_audio>; VDDIO-supply = <®_module_3v3>; @@ -517,8 +519,6 @@ MX6QDL_PAD_DISP0_DAT21__AUD4_TXD 0x130b0 MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0 MX6QDL_PAD_DISP0_DAT23__AUD4_RXD 0x130b0 - /* SGTL5000 sys_mclk */ - MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 >; }; @@ -811,6 +811,12 @@ >; }; + pinctrl_sgtl5000: sgtl5000grp { + fsl,pins = < + MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 + >; + }; + pinctrl_spdif: spdifgrp { fsl,pins = < MX6QDL_PAD_GPIO_16__SPDIF_IN 0x1b0b0 From 3a26787dacf04257a68b16315c984eb2c340bc5e Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Sat, 9 Apr 2022 11:48:49 +0800 Subject: [PATCH 047/803] iio: magnetometer: ak8975: Fix the error handling in ak8975_power_on() When the driver fails to enable the regulator 'vid', we will get the following splat: [ 79.955610] WARNING: CPU: 5 PID: 441 at drivers/regulator/core.c:2257 _regulator_put+0x3ec/0x4e0 [ 79.959641] RIP: 0010:_regulator_put+0x3ec/0x4e0 [ 79.967570] Call Trace: [ 79.967773] [ 79.967951] regulator_put+0x1f/0x30 [ 79.968254] devres_release_group+0x319/0x3d0 [ 79.968608] i2c_device_probe+0x766/0x940 Fix this by disabling the 'vdd' regulator when failing to enable 'vid' regulator. Signed-off-by: Zheyu Ma Link: https://lore.kernel.org/r/20220409034849.3717231-2-zheyuma97@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/ak8975.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 088f748b683e..2432e697150c 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -416,6 +416,7 @@ static int ak8975_power_on(const struct ak8975_data *data) if (ret) { dev_warn(&data->client->dev, "Failed to enable specified Vid supply\n"); + regulator_disable(data->vdd); return ret; } From 89a01cd688d3c0ac983ef0b0e5f40018ab768317 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 6 Apr 2022 12:56:20 +0200 Subject: [PATCH 048/803] iio: dac: ad5446: Fix read_raw not returning set value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_raw should return the un-scaled value. Fixes: 5e06bdfb46e8b ("staging:iio:dac:ad5446: Return cached value for 'raw' attribute") Signed-off-by: Michael Hennerich Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20220406105620.1171340-1-michael.hennerich@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5446.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 14cfabacbea5..fdf824041497 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -178,7 +178,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - *val = st->cached_val; + *val = st->cached_val >> chan->scan_type.shift; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; From d79478a79cfa393cde46bccb05d52fc7d875d2e2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Wed, 6 Apr 2022 09:50:04 -0700 Subject: [PATCH 049/803] iio: sx9324: Fix default precharge internal resistance register Fix the default value for the register that set the resistance: it has to be 0x10 per datasheet. Fixes: 4c18a890dff8d ("iio:proximity:sx9324: Add SX9324 support") Cc: stable@vger.kernel.org Signed-off-by: Gwendal Grignou Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20220406165011.10202-2-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9324.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index 6e90917e3e36..70c37f664f6d 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -70,7 +70,8 @@ #define SX9324_REG_AFE_PH2 0x2a #define SX9324_REG_AFE_PH3 0x2b #define SX9324_REG_AFE_CTRL8 0x2c -#define SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM 0x02 +#define SX9324_REG_AFE_CTRL8_RESERVED 0x10 +#define SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM 0x02 #define SX9324_REG_AFE_CTRL9 0x2d #define SX9324_REG_AFE_CTRL9_AGAIN_1 0x08 @@ -795,7 +796,8 @@ static const struct sx_common_reg_default sx9324_default_regs[] = { { SX9324_REG_AFE_PH2, 0x1a }, { SX9324_REG_AFE_PH3, 0x16 }, - { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM }, + { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESERVED | + SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM }, { SX9324_REG_AFE_CTRL9, SX9324_REG_AFE_CTRL9_AGAIN_1 }, { SX9324_REG_PROX_CTRL0, From de3b9fe9609a05d3c354c6718ca657962d11d9fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Apr 2022 14:42:44 +0300 Subject: [PATCH 050/803] iio:dac:ad3552r: Fix an IS_ERR() vs NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fwnode_get_named_child_node() function does not return error pointers. It returns NULL. Update the check accordingly. Fixes: 8f2b54824b28 ("drivers:iio:dac: Add AD3552R driver support") Signed-off-by: Dan Carpenter Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20220404114244.GA19201@kili Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3552r.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index e0a93b27e0e8..d5ea1a1be122 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -809,10 +809,10 @@ static int ad3552r_configure_custom_gain(struct ad3552r_desc *dac, gain_child = fwnode_get_named_child_node(child, "custom-output-range-config"); - if (IS_ERR(gain_child)) { + if (!gain_child) { dev_err(dev, "mandatory custom-output-range-config property missing\n"); - return PTR_ERR(gain_child); + return -EINVAL; } dac->ch_data[ch].range_override = 1; From c7b45c79fb279e539346919a5c196e417925719e Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 31 Mar 2022 15:02:06 +0200 Subject: [PATCH 051/803] arm64: dts: imx8mq-tqma8mq: change the spi-nor tx This fixes the qspi read command by importing the changes from commit 04aa946d57b2 ("arm64: dts: imx8: change the spi-nor tx"). Fixes: b186b8b6e770 ("arm64: dts: freescale: add initial device tree for TQMa8Mx with i.MX8M") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi index 38ffcd145b33..899e8e7dbc24 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi @@ -253,7 +253,7 @@ #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <84000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; From 574518b7ccbaef74cb89eb1a1a0da88afa1e0113 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 4 Apr 2022 01:42:05 +0200 Subject: [PATCH 052/803] arm64: dts: imx8mn: Fix SAI nodes The most specific compatible string element should be "fsl,imx8mn-sai" on i.MX8M Nano, fix it from current "fsl,imx8mm-sai" (two Ms, likely due to copy-paste error from i.MX8M Mini). Fixes: 9e9860069725f ("arm64: dts: imx8mn: Add SAI nodes") Signed-off-by: Marek Vasut Cc: Adam Ford Cc: Fabio Estevam Cc: Peng Fan Cc: Shawn Guo Cc: NXP Linux Team To: linux-arm-kernel@lists.infradead.org Reviewed-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 99f0f5026674..5c0ca2490561 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -293,7 +293,7 @@ ranges; sai2: sai@30020000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30020000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI2_IPG>, @@ -307,7 +307,7 @@ }; sai3: sai@30030000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30030000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI3_IPG>, @@ -321,7 +321,7 @@ }; sai5: sai@30050000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30050000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI5_IPG>, @@ -337,7 +337,7 @@ }; sai6: sai@30060000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30060000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI6_IPG>, @@ -394,7 +394,7 @@ }; sai7: sai@300b0000 { - compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai"; + compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x300b0000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI7_IPG>, From 4c79865f3e8a2db93ec1e844509edfebe5a6ae56 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Tue, 5 Apr 2022 12:35:09 -0700 Subject: [PATCH 053/803] ARM: dts: imx8mm-venice-gw{71xx,72xx,73xx}: fix OTG controller OC mode The GW71xx, GW72xx and GW73xx boards have USB1 routed to a USB OTG connectors and USB2 routed to a USB hub. The OTG connector has a over-currently protection with an active-low pin and the USB1 to HUB connection has no over-current protection (as the HUB itself implements this for its downstream ports). Add proper dt nodes to specify the over-current pin polarity for USB1 and disable over-current protection for USB2. Fixes: 6f30b27c5ef5 ("arm64: dts: imx8mm: Add Gateworks i.MX 8M Mini Development Kits") Cc: stable@vger.kernel.org Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi index 6acea1c28779..cce55c3c5df0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi @@ -146,12 +146,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi index 353c3dc19d2a..f61e4847fa49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi @@ -211,12 +211,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; vbus-supply = <®_usb_otg2_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 1db2e254af3a..023619648966 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -238,12 +238,14 @@ &usbotg1 { dr_mode = "otg"; + over-current-active-low; vbus-supply = <®_usb_otg1_vbus>; status = "okay"; }; &usbotg2 { dr_mode = "host"; + disable-over-current; vbus-supply = <®_usb_otg2_vbus>; status = "okay"; }; From 7af1caf8781b9e4e53bf6b2a1de0deb3c122501a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 4 Mar 2022 14:25:18 -0600 Subject: [PATCH 054/803] ARM: dts: imx: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi | 32 +++++++++---------- .../dts/imx6ul-phytec-segin-peb-av-02.dtsi | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi index 563bf9d44fe0..0b90c3f59f89 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi @@ -154,112 +154,112 @@ regulators { bcore1 { regulator-name = "bcore1"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bcore2 { regulator-name = "bcore2"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bpro { regulator-name = "bpro"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bperi { regulator-name = "bperi"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bmem { regulator-name = "bmem"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo2 { regulator-name = "ldo2"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <1800000>; }; ldo3 { regulator-name = "ldo3"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo4 { regulator-name = "ldo4"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo5 { regulator-name = "ldo5"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo6 { regulator-name = "ldo6"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo7 { regulator-name = "ldo7"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo8 { regulator-name = "ldo8"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo9 { regulator-name = "ldo9"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo10 { regulator-name = "ldo10"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo11 { regulator-name = "ldo11"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bio { regulator-name = "bio"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; diff --git a/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi b/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi index 7cda6944501d..205e4d462702 100644 --- a/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi +++ b/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi @@ -72,8 +72,8 @@ st,settling = <2>; st,fraction-z = <7>; st,i-drive = <1>; - touchscreen-inverted-x = <1>; - touchscreen-inverted-y = <1>; + touchscreen-inverted-x; + touchscreen-inverted-y; }; }; }; From f571e9c9aafed2fbd60fd99aa4b9823221338b98 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 4 Mar 2022 14:25:27 -0600 Subject: [PATCH 055/803] arm64: dts: imx: Fix imx8*-var-som touchscreen property sizes The common touchscreen properties are all 32-bit, not 16-bit. These properties must not be too important as they are all ignored in case of an error reading them. Signed-off-by: Rob Herring Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 1dc9d187601c..a0bd540f27d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -89,12 +89,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index b16c7caf34c1..87b5e23c766f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -70,12 +70,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; From 85ec038b53faec11baefb2c42b6c0ce8bec94d3e Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:18 +0800 Subject: [PATCH 056/803] video: fbdev: neofb: Fix the check of 'var->pixclock' The previous check against 'var->pixclock' doesn't return -EINVAL when it equals zero, but the driver uses it again, causing the divide error. Fix this by returning when 'var->pixclock' is zero. The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:neofb_set_par+0x190f/0x49a0 Call Trace: fb_set_var+0x604/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/neofb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/neofb.c b/drivers/video/fbdev/neofb.c index 966df2a07360..28d32cbf496b 100644 --- a/drivers/video/fbdev/neofb.c +++ b/drivers/video/fbdev/neofb.c @@ -585,7 +585,7 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) DBG("neofb_check_var"); - if (var->pixclock && PICOS2KHZ(var->pixclock) > par->maxClock) + if (!var->pixclock || PICOS2KHZ(var->pixclock) > par->maxClock) return -EINVAL; /* Is the mode larger than the LCD panel? */ From 213e2df4733275165038d77289812d4473b0b010 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:19 +0800 Subject: [PATCH 057/803] video: fbdev: kyro: Error out if 'lineclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'lineclock', it may cause divide error. Fix this by checking whether 'lineclock' is zero. The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:kyrofb_set_par+0x30d/0xd80 Call Trace: fb_set_var+0x604/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/kyro/fbdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 25801e8e3f74..d57772f96ad2 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -494,6 +494,8 @@ static int kyrofb_set_par(struct fb_info *info) info->var.hsync_len + info->var.left_margin)) / 1000; + if (!lineclock) + return -EINVAL; /* time for a frame in ns (precision in 32bpp) */ frameclock = lineclock * (info->var.yres + From f2bfd792c1ed4b1e0578db3fcdb0879dc87fe027 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:20 +0800 Subject: [PATCH 058/803] video: fbdev: vt8623fb: Error out if 'pixclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'pixclock', it may cause divide error. Fix this by checking whether 'pixclock' is zero in the function vt8623fb_check_var(). The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:vt8623fb_set_par+0xecd/0x2210 Call Trace: fb_set_var+0x604/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/vt8623fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c index 7a959e5ba90b..a92a8c670cf0 100644 --- a/drivers/video/fbdev/vt8623fb.c +++ b/drivers/video/fbdev/vt8623fb.c @@ -321,6 +321,9 @@ static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf { int rv, mem, step; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (vt8623fb_formats, var, NULL); if (rv < 0) From 16844e5870424c2728486dc0c0300ebf7fa09ad6 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:21 +0800 Subject: [PATCH 059/803] video: fbdev: tridentfb: Error out if 'pixclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'pixclock', it may cause divide error. Fix this by checking whether 'pixclock' is zero. The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:tridentfb_check_var+0x853/0xe60 Call Trace: fb_set_var+0x367/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/tridentfb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c index 4d20cb557ff0..319131bd72cf 100644 --- a/drivers/video/fbdev/tridentfb.c +++ b/drivers/video/fbdev/tridentfb.c @@ -996,6 +996,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var, int ramdac = 230000; /* 230MHz for most 3D chips */ debug("enter\n"); + if (!var->pixclock) + return -EINVAL; + /* check color depth */ if (bpp == 24) bpp = var->bits_per_pixel = 32; From e1e965156438a3662dbb151d892ada834214c833 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:22 +0800 Subject: [PATCH 060/803] video: fbdev: arkfb: Error out if 'pixclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'pixclock', it may cause divide error. Fix this by checking whether 'pixclock' is zero. The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:arkfb_set_par+0x10fc/0x24f0 Call Trace: fb_set_var+0x604/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/arkfb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/arkfb.c b/drivers/video/fbdev/arkfb.c index edf169d0816e..eb3e47c58c5f 100644 --- a/drivers/video/fbdev/arkfb.c +++ b/drivers/video/fbdev/arkfb.c @@ -566,6 +566,9 @@ static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { int rv, mem, step; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (arkfb_formats, var, NULL); if (rv < 0) From 7015bb57c304bad7289e872c2c5c587adee3a756 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:23 +0800 Subject: [PATCH 061/803] video: fbdev: s3fb: Error out if 'pixclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'pixclock', it may cause divide error. Fix this by checking whether 'pixclock' is zero in s3fb_check_var(). The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:s3fb_check_var+0x3f3/0x530 Call Trace: fb_set_var+0x367/0xeb0 do_fb_ioctl+0x234/0x670 fb_ioctl+0xdd/0x130 do_syscall_64+0x3b/0x90 Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/s3fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index 5c74253e7b2c..b93c8eb02336 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -549,6 +549,9 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) int rv, mem, step; u16 m, n, r; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (s3fb_formats, var, NULL); From 12acdbd7ca7d8b3ac0f55d8069f52c223d8d23fd Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Sun, 10 Apr 2022 22:28:33 +0200 Subject: [PATCH 062/803] video: fbdev: i740fb: use memset_io() to clear screen sparse complains that using memset() on __iomem pointer is wrong: incorrect type in argument 1 (different address spaces) Use memset_io() to clear screen instead. Tested on real i740 cards. Signed-off-by: Ondrej Zary Signed-off-by: Helge Deller --- drivers/video/fbdev/i740fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index 52cce0db8bd3..dd45ea8203be 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -740,7 +740,7 @@ static int i740fb_set_par(struct fb_info *info) if (i) return i; - memset(info->screen_base, 0, info->screen_size); + memset_io(info->screen_base, 0, info->screen_size); vga_protect(par); From 15cf0b82271b1823fb02ab8c377badba614d95d5 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 4 Apr 2022 16:47:17 +0800 Subject: [PATCH 063/803] video: fbdev: i740fb: Error out if 'pixclock' equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of 'pixclock', it may cause divide error. Fix this by checking whether 'pixclock' is zero in the function i740fb_check_var(). The following log reveals it: divide error: 0000 [#1] PREEMPT SMP KASAN PTI RIP: 0010:i740fb_decode_var drivers/video/fbdev/i740fb.c:444 [inline] RIP: 0010:i740fb_set_par+0x272f/0x3bb0 drivers/video/fbdev/i740fb.c:739 Call Trace: fb_set_var+0x604/0xeb0 drivers/video/fbdev/core/fbmem.c:1036 do_fb_ioctl+0x234/0x670 drivers/video/fbdev/core/fbmem.c:1112 fb_ioctl+0xdd/0x130 drivers/video/fbdev/core/fbmem.c:1191 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] Signed-off-by: Zheyu Ma Signed-off-by: Helge Deller --- drivers/video/fbdev/i740fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index dd45ea8203be..09dd85553d4f 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -657,6 +657,9 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var, static int i740fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { + if (!var->pixclock) + return -EINVAL; + switch (var->bits_per_pixel) { case 8: var->red.offset = var->green.offset = var->blue.offset = 0; From f32c5a0423400e01f4d7c607949fa3a1f006e8fa Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 6 Apr 2022 16:14:08 +0200 Subject: [PATCH 064/803] USB: serial: option: add Telit 0x1057, 0x1058, 0x1075 compositions Add support for the following Telit FN980 and FN990 compositions: 0x1057: tty, adb, rmnet, tty, tty, tty, tty, tty 0x1058: tty, adb, tty, tty, tty, tty, tty 0x1075: adb, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20220406141408.580669-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e7755d9cfc61..8e2fc232da10 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1217,6 +1217,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff), /* Telit FN980 */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */ @@ -1233,6 +1237,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ + .driver_info = RSVD(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 8be9cdc6911877843c4f13e44e836382818eb355 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 26 Feb 2022 09:43:33 +0000 Subject: [PATCH 065/803] bus: imx-weim: make symbol 'weim_of_notifier' static The sparse tool complains as follows: drivers/bus/imx-weim.c:373:23: warning: symbol 'weim_of_notifier' was not declared. Should it be static? This symbol is not used outside of imx-weim.c, so marks it static. Fixes: e6cb5408289f ("bus: imx-weim: add DT overlay support for WEIM bus") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Shawn Guo --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 2ea0a51f79f6..828c66bbaa67 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -369,7 +369,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, return ret; } -struct notifier_block weim_of_notifier = { +static struct notifier_block weim_of_notifier = { .notifier_call = of_weim_notify, }; #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ From b541f9e59a0e56fff840cf983394e59de7bc2d96 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Apr 2022 15:54:40 +0200 Subject: [PATCH 066/803] phy: ti: tusb1210: Make tusb1210_chg_det_states static Make tusb1210_chg_det_states static, fixing the following sparse warning: drivers/phy/ti/phy-tusb1210.c:158:12: sparse: sparse: symbol 'tusb1210_chg_det_states' was not declared. Should it be static? Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Reported-by: kernel test robot Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220411135440.558394-1-hdegoede@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-tusb1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index a0cdbcadf09e..c5bd74874f73 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -155,7 +155,7 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode) } #ifdef CONFIG_POWER_SUPPLY -const char * const tusb1210_chg_det_states[] = { +static const char * const tusb1210_chg_det_states[] = { "CHG_DET_CONNECTING", "CHG_DET_START_DET", "CHG_DET_READ_DET", From 425d239379db03d514cb1c476bfe7c320bb89dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Apr 2022 23:30:53 +0200 Subject: [PATCH 067/803] bpf: Fix release of page_pool in BPF_PROG_RUN in test runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The live packet mode in BPF_PROG_RUN allocates a page_pool instance for each test run instance and uses it for the packet data. On setup it creates the page_pool, and calls xdp_reg_mem_model() to allow pages to be returned properly from the XDP data path. However, xdp_reg_mem_model() also raises the reference count of the page_pool itself, so the single page_pool_destroy() count on teardown was not enough to actually release the pool. To fix this, add an additional xdp_unreg_mem_model() call on teardown. Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Reported-by: Freysteinn Alfredsson Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220409213053.3117305-1-toke@redhat.com --- net/bpf/test_run.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index e7b9c2636d10..af709c182674 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -108,6 +108,7 @@ struct xdp_test_data { struct page_pool *pp; struct xdp_frame **frames; struct sk_buff **skbs; + struct xdp_mem_info mem; u32 batch_size; u32 frame_cnt; }; @@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg) static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) { - struct xdp_mem_info mem = {}; struct page_pool *pp; int err = -ENOMEM; struct page_pool_params pp_params = { @@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c } /* will copy 'mem.id' into pp->xdp_mem_id */ - err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp); + err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); if (err) goto err_mmodel; @@ -202,6 +202,7 @@ err_skbs: static void xdp_test_run_teardown(struct xdp_test_data *xdp) { + xdp_unreg_mem_model(&xdp->mem); page_pool_destroy(xdp->pp); kfree(xdp->frames); kfree(xdp->skbs); From 962dd65e575dde950ef0844568edc37cfb39f302 Mon Sep 17 00:00:00 2001 From: Guillaume Giraudon Date: Mon, 11 Apr 2022 10:44:28 -0400 Subject: [PATCH 068/803] arm64: dts: meson-sm1-bananapi-m5: fix wrong GPIO pin labeling for CON1 The labels for lines 61 through 84 on the periphs-banks were offset by 2. 2 lines are missing in the BOOT GPIO lines (contains 14, should be 16) Added 2 empty entries in BOOT to realigned the rest of GPIO labels to match the Banana Pi M5 schematics. (Thanks to Neil Armstrong for the heads up on the position of the missing pins) Fixes: 976e920183e4 ("arm64: dts: meson-sm1: add Banana PI BPI-M5 board dts") Signed-off-by: Guillaume Giraudon Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220411144427.874-1-ggiraudon@prism19.com --- arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts index 5751c48620ed..cadba194b149 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts @@ -437,6 +437,7 @@ "", "eMMC_RST#", /* BOOT_12 */ "eMMC_DS", /* BOOT_13 */ + "", "", /* GPIOC */ "SD_D0_B", /* GPIOC_0 */ "SD_D1_B", /* GPIOC_1 */ From 751ee15da5e5d33e15726c1a79de0f5db8155bdd Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 8 Apr 2022 09:56:17 +0000 Subject: [PATCH 069/803] phy: ti: Fix missing of_node_put in ti_pipe3_get_sysctrl() of_parse_phandle() returns node pointer with refcount incremented, use of_node_put() on it to decrease refcount when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Link: https://lore.kernel.org/r/20220408095617.2495234-1-lv.ruyi@zte.com.cn Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-ti-pipe3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/ti/phy-ti-pipe3.c b/drivers/phy/ti/phy-ti-pipe3.c index 2cbc91e535d4..f502c36f3be5 100644 --- a/drivers/phy/ti/phy-ti-pipe3.c +++ b/drivers/phy/ti/phy-ti-pipe3.c @@ -696,6 +696,7 @@ static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy) } control_pdev = of_find_device_by_node(control_node); + of_node_put(control_node); if (!control_pdev) { dev_err(dev, "Failed to get control device\n"); return -EINVAL; From 388ec8f079f2f20d5cd183c3bc6f33cbc3ffd3ef Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 7 Apr 2022 11:18:56 +0200 Subject: [PATCH 070/803] phy: samsung: Fix missing of_node_put() in exynos_sata_phy_probe The device_node pointer is returned by of_parse_phandle() with refcount incremented. We should use of_node_put() on it when done. Fixes: bcff4cba41bc ("PHY: Exynos: Add Exynos5250 SATA PHY driver") Signed-off-by: Miaoqian Lin Reviewed-by: Krzysztof Kozlowski Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220407091857.230386-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5250-sata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c index 9ec234243f7c..6c305a3fe187 100644 --- a/drivers/phy/samsung/phy-exynos5250-sata.c +++ b/drivers/phy/samsung/phy-exynos5250-sata.c @@ -187,6 +187,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) return -EINVAL; sata_phy->client = of_find_i2c_device_by_node(node); + of_node_put(node); if (!sata_phy->client) return -EPROBE_DEFER; From 5c8402c4db45dd55c2c93c8d730f5dfa7c78a702 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 11:18:57 +0200 Subject: [PATCH 071/803] phy: samsung: exynos5250-sata: fix missing device put in probe error paths The actions of of_find_i2c_device_by_node() in probe function should be reversed in error paths by putting the reference to obtained device. Fixes: bcff4cba41bc ("PHY: Exynos: Add Exynos5250 SATA PHY driver") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20220407091857.230386-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5250-sata.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c index 6c305a3fe187..595adba5fb8f 100644 --- a/drivers/phy/samsung/phy-exynos5250-sata.c +++ b/drivers/phy/samsung/phy-exynos5250-sata.c @@ -196,20 +196,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl"); if (IS_ERR(sata_phy->phyclk)) { dev_err(dev, "failed to get clk for PHY\n"); - return PTR_ERR(sata_phy->phyclk); + ret = PTR_ERR(sata_phy->phyclk); + goto put_dev; } ret = clk_prepare_enable(sata_phy->phyclk); if (ret < 0) { dev_err(dev, "failed to enable source clk\n"); - return ret; + goto put_dev; } sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops); if (IS_ERR(sata_phy->phy)) { - clk_disable_unprepare(sata_phy->phyclk); dev_err(dev, "failed to create PHY\n"); - return PTR_ERR(sata_phy->phy); + ret = PTR_ERR(sata_phy->phy); + goto clk_disable; } phy_set_drvdata(sata_phy->phy, sata_phy); @@ -217,11 +218,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { - clk_disable_unprepare(sata_phy->phyclk); - return PTR_ERR(phy_provider); + ret = PTR_ERR(phy_provider); + goto clk_disable; } return 0; + +clk_disable: + clk_disable_unprepare(sata_phy->phyclk); +put_dev: + put_device(&sata_phy->client->dev); + + return ret; } static const struct of_device_id exynos_sata_phy_of_match[] = { From a9f17d0c0778dd971dc9770fa0a2085a41d8c5e4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Apr 2022 15:06:08 +0200 Subject: [PATCH 072/803] phy: ti: tusb1210: Fix an error handling path in tusb1210_probe() tusb1210_probe_charger_detect() must be undone by a corresponding tusb1210_remove_charger_detect() in the error handling path, as already done in the remove function. Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Signed-off-by: Christophe JAILLET Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/07c4926c42243cedb3b6067a241bb486fdda01b5.1648991162.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-tusb1210.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index c5bd74874f73..c3ab4b69ea68 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -537,12 +537,18 @@ static int tusb1210_probe(struct ulpi *ulpi) tusb1210_probe_charger_detect(tusb); tusb->phy = ulpi_phy_create(ulpi, &phy_ops); - if (IS_ERR(tusb->phy)) - return PTR_ERR(tusb->phy); + if (IS_ERR(tusb->phy)) { + ret = PTR_ERR(tusb->phy); + goto err_remove_charger; + } phy_set_drvdata(tusb->phy, tusb); ulpi_set_drvdata(ulpi, tusb); return 0; + +err_remove_charger: + tusb1210_remove_charger_detect(tusb); + return ret; } static void tusb1210_remove(struct ulpi *ulpi) From c40b65304c361432b841bdbd5b1c8dfa918d6baa Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Apr 2022 14:58:01 +0200 Subject: [PATCH 073/803] video: fbdev: sh_mobile_lcdcfb: Remove sh_mobile_lcdc_check_var() declaration As of commit 0fe66f327c464943 ("fbdev/sh_mobile: remove sh_mobile_lcdc_display_notify"), there is no longer a need for a foward declaration of sh_mobile_lcdc_check_var(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- drivers/video/fbdev/sh_mobile_lcdcfb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c index aa4ebe3192ec..9a4417430b4e 100644 --- a/drivers/video/fbdev/sh_mobile_lcdcfb.c +++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c @@ -531,9 +531,6 @@ static void sh_mobile_lcdc_display_off(struct sh_mobile_lcdc_chan *ch) ch->tx_dev->ops->display_off(ch->tx_dev); } -static int sh_mobile_lcdc_check_var(struct fb_var_screeninfo *var, - struct fb_info *info); - /* ----------------------------------------------------------------------------- * Format helpers */ From 0f83e6b4161617014017a694888dd8743f46f071 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 9 Mar 2022 10:43:01 +0000 Subject: [PATCH 074/803] ARM: OMAP2+: Fix refcount leak in omap_gic_of_init The of_find_compatible_node() function returns a node pointer with refcount incremented, We should use of_node_put() on it when done Add the missing of_node_put() to release the refcount. Fixes: fd1c07861491 ("ARM: OMAP4: Fix the init code to have OMAP4460 errata available in DT build") Signed-off-by: Miaoqian Lin Message-Id: <20220309104302.18398-1-linmq006@gmail.com> Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap4-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 5c3845730dbf..0b80f8bcd304 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -314,10 +314,12 @@ void __init omap_gic_of_init(void) np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic"); gic_dist_base_addr = of_iomap(np, 0); + of_node_put(np); WARN_ON(!gic_dist_base_addr); np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer"); twd_base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!twd_base); skip_errata_init: From a12315d6d27093392b6c634e1d35a59f1d1f7a59 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 12 Apr 2022 12:26:51 +0300 Subject: [PATCH 075/803] bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific On beagleboard revisions A to B4 we need to use gpt12 as the system timer. However, the quirk handling added for gpt12 caused a regression for system suspend for am335x as the PM coprocessor needs the timers idled for suspend. Let's make the gpt12 quirk specific to omap34xx, other SoCs don't need it. Beagleboard revisions C and later no longer need to use the gpt12 related quirk. Then at some point, if we decide to drop support for the old beagleboard revisions A to B4, we can also drop the gpt12 related quirks completely. Fixes: 3ff340e24c9d ("bus: ti-sysc: Fix gpt12 system timer issue with reserved status") Reported-by: Kevin Hilman Suggested-by: Kevin Hilman Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 54c0ee6dda30..7a1b1f9e4933 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -3232,13 +3232,27 @@ static int sysc_check_disabled_devices(struct sysc *ddata) */ static int sysc_check_active_timer(struct sysc *ddata) { + int error; + if (ddata->cap->type != TI_SYSC_OMAP2_TIMER && ddata->cap->type != TI_SYSC_OMAP4_TIMER) return 0; + /* + * Quirk for omap3 beagleboard revision A to B4 to use gpt12. + * Revision C and later are fixed with commit 23885389dbbb ("ARM: + * dts: Fix timer regression for beagleboard revision c"). This all + * can be dropped if we stop supporting old beagleboard revisions + * A to B4 at some point. + */ + if (sysc_soc->soc == SOC_3430) + error = -ENXIO; + else + error = -EBUSY; + if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) && (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE)) - return -ENXIO; + return error; return 0; } From 8d2453d9a307c2eafd21242dd73f35f05fb7ce74 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 12 Apr 2022 12:26:51 +0300 Subject: [PATCH 076/803] ARM: dts: dra7: Fix suspend warning for vpe powerdomain We currently are getting the following warning after a system suspend: Powerdomain (vpe_pwrdm) didn't enter target state 0 Looks like this is because the STANDBYMODE bit for SMART_IDLE should not be used. The TRM "Table 12-348. VPE_SYSCONFIG" says that the value for SMART_IDLE is "0x2: Same behavior as bit-field value of 0x1". But if the SMART_IDLE value is used, PM_VPE_PWRSTST LASTPOWERSTATEENTERED bits always show value of 3. Let's fix the issue by dropping SMART_IDLE for vpe. And let's also add the missing the powerdomain for vpe. Fixes: 1a2095160594 ("ARM: dts: dra7: Add ti-sysc node for VPE") Cc: Benoit Parrot Reported-by: Kevin Hilman Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-l4.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 0a11bacffc1f..5733e3a4ea8e 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -4188,11 +4188,11 @@ reg = <0x1d0010 0x4>; reg-names = "sysc"; ti,sysc-midle = , - , - ; + ; ti,sysc-sidle = , , ; + power-domains = <&prm_vpe>; clocks = <&vpe_clkctrl DRA7_VPE_VPE_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; From 290c4a902b79246ec55e477fc313f27f98393dee Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Sun, 10 Apr 2022 22:06:48 -0500 Subject: [PATCH 077/803] RDMA/rxe: Fix "Replace mr by rkey in responder resources" The referenced commit generates a reference counting error if the rkey has the same index but the wrong key. In this case the reference taken by rxe_pool_get_index() is not dropped. Drop the reference if the keys don't match in rxe_recheck_mr(). Check that the mw and mr are still valid. Fixes: 8a1a0be894da ("RDMA/rxe: Replace mr by rkey in responder resources") Link: https://lore.kernel.org/r/20220411030647.20011-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 16fc7ea1298d..1d95fab606da 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp, * It is assumed that the access permissions if originally good * are OK and the mappings to be unchanged. * + * TODO: If someone reregisters an MR to change its size or + * access permissions during the processing of an RDMA read + * we should kill the responder resource and complete the + * operation with an error. + * * Return: mr on success else NULL */ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) @@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) if (rkey_is_mw(rkey)) { mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); - if (!mw || mw->rkey != rkey) + if (!mw) return NULL; - if (mw->state != RXE_MW_STATE_VALID) { + mr = mw->mr; + if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || + !mr || mr->state != RXE_MR_STATE_VALID) { rxe_put(mw); return NULL; } - mr = mw->mr; + rxe_get(mr); rxe_put(mw); - } else { - mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); - if (!mr || mr->rkey != rkey) - return NULL; + + return mr; } - if (mr->state != RXE_MR_STATE_VALID) { + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr) + return NULL; + + if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { rxe_put(mr); return NULL; } From 610323d8f6f8b479a04eec33fd67e4152beb7b65 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 1 Apr 2022 00:35:03 +0200 Subject: [PATCH 078/803] video: fbdev: mmp: replace usage of found with dedicated list iterator variable To move the list iterator variable into the list_for_each_entry_*() macro in the future it should be avoided to use the list iterator variable after the loop body. To *never* use the list iterator variable after the loop it was concluded to use a separate iterator variable instead of a found boolean [1]. This removes the need to use a found variable and simply checking if the variable was set, can determine if the break/goto was hit. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Signed-off-by: Helge Deller --- drivers/video/fbdev/mmp/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/video/fbdev/mmp/core.c b/drivers/video/fbdev/mmp/core.c index 154127256a2c..03707461eced 100644 --- a/drivers/video/fbdev/mmp/core.c +++ b/drivers/video/fbdev/mmp/core.c @@ -127,19 +127,18 @@ EXPORT_SYMBOL_GPL(mmp_unregister_panel); */ struct mmp_path *mmp_get_path(const char *name) { - struct mmp_path *path; - int found = 0; + struct mmp_path *path = NULL, *iter; mutex_lock(&disp_lock); - list_for_each_entry(path, &path_list, node) { - if (!strcmp(name, path->name)) { - found = 1; + list_for_each_entry(iter, &path_list, node) { + if (!strcmp(name, iter->name)) { + path = iter; break; } } mutex_unlock(&disp_lock); - return found ? path : NULL; + return path; } EXPORT_SYMBOL_GPL(mmp_get_path); From e64d5fa5044f225ac87d96a7e4be11389999c4c6 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 5 Apr 2022 18:29:07 +0530 Subject: [PATCH 079/803] bus: mhi: host: pci_generic: Add missing poweroff() PM callback During hibernation process, once thaw() stage completes, the MHI endpoint devices will be in M0 state post recovery. After that, the devices will be powered down so that the system can enter the target sleep state. During this stage, the PCI core will put the devices in D3hot. But this transition is allowed by the MHI spec. The devices can only enter D3hot when it is in M3 state. So for fixing this issue, let's add the poweroff() callback that will get executed before putting the system in target sleep state during hibernation. This callback will power down the device properly so that it could be restored during restore() or thaw() stage. Cc: stable@vger.kernel.org Fixes: 5f0c2ee1fe8d ("bus: mhi: pci-generic: Fix hibernation") Reported-by: Hemant Kumar Suggested-by: Hemant Kumar Link: https://lore.kernel.org/r/20220405125907.5644-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index 9527b7d63840..ef85dbfb3216 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1085,6 +1085,7 @@ static const struct dev_pm_ops mhi_pci_pm_ops = { .resume = mhi_pci_resume, .freeze = mhi_pci_freeze, .thaw = mhi_pci_restore, + .poweroff = mhi_pci_freeze, .restore = mhi_pci_restore, #endif }; From c38f83bae4037023827c85e045841d0421f85034 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 8 Apr 2022 20:30:39 +0530 Subject: [PATCH 080/803] bus: mhi: host: pci_generic: Flush recovery worker during freeze It is possible that the recovery work might be running while the freeze gets executed (during hibernation etc.,). Currently, we don't powerdown the stack if it is not up but if the recovery work completes after freeze, then the device will be up afterwards. This will not be a sane situation. So let's flush the recovery worker before trying to powerdown the device. Cc: stable@vger.kernel.org Fixes: 5f0c2ee1fe8d ("bus: mhi: pci-generic: Fix hibernation") Reported-by: Bhaumik Vasav Bhatt Reviewed-by: Bhaumik Vasav Bhatt Link: https://lore.kernel.org/r/20220408150039.17297-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/pci_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index ef85dbfb3216..541ced27d941 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1060,6 +1060,7 @@ static int __maybe_unused mhi_pci_freeze(struct device *dev) * the intermediate restore kernel reinitializes MHI device with new * context. */ + flush_work(&mhi_pdev->recovery_work); if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { mhi_power_down(mhi_cntrl, true); mhi_unprepare_after_power_down(mhi_cntrl); From 3588060befff75ff39fab7122b94c6fb3148fcda Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 18 Mar 2022 10:57:46 +0000 Subject: [PATCH 081/803] phy: ti: omap-usb2: Fix error handling in omap_usb2_enable_clocks The corresponding API for clk_prepare_enable is clk_disable_unprepare. Make sure that the clock is unprepared on exit by changing clk_disable to clk_disable_unprepare. Fixes: ed31ee7cf1fe ("phy: ti: usb2: Fix logic on -EPROBE_DEFER") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220318105748.19532-1-linmq006@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 3a505fe5715a..31a775877f6e 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -215,7 +215,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy) return 0; err1: - clk_disable(phy->wkupclk); + clk_disable_unprepare(phy->wkupclk); err0: return ret; From b2dd71f9f728da695a86b8308feb4f39defe9019 Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Mon, 4 Apr 2022 18:05:25 -0300 Subject: [PATCH 082/803] tools/power/x86/intel-speed-select: fix build failure when using -Wl,--as-needed Build of intel-speed-select will fail if you run: $ LDFLAGS="-Wl,--as-needed" /usr/bin/make V=1 ... gcc -O2 -Wall -g -D_GNU_SOURCE -Iinclude -I/usr/include/libnl3 -Wl,--as-needed -lnl-genl-3 -lnl-3 intel-speed-select-in.o -o intel-speed-select /usr/bin/ld: intel-speed-select-in.o: in function `handle_event': (...)/linux/tools/power/x86/intel-speed-select/hfi-events.c:189: undefined reference to `nlmsg_hdr' ... In this case the problem is that order when linking matters when using the flag -Wl,--as-needed, symbols not used at that point are discarded. So since intel-speed-select-in.o comes after, at that point the libraries/symbols are already discarded and then missing/undefined references are reported. To fix this, make sure we specify LDFLAGS after the object file. Acked-by: Srinivas Pandruvada Signed-off-by: Herton R. Krzesinski Link: https://lore.kernel.org/r/20220404210525.725611-1-herton@redhat.com Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 846f785e278d..7221f2f55e8b 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -42,7 +42,7 @@ ISST_IN := $(OUTPUT)intel-speed-select-in.o $(ISST_IN): prepare FORCE $(Q)$(MAKE) $(build)=intel-speed-select $(OUTPUT)intel-speed-select: $(ISST_IN) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ clean: rm -f $(ALL_PROGRAMS) From 4d0d5c352303a318925ceb84a86818761aa6586b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Apr 2022 08:47:29 -0500 Subject: [PATCH 083/803] dt-bindings: hwmon: ti,tmp421: Fix type for 'ti,n-factor' 'ti,n-factor' is read as a 32-bit signed value, so the type and constraints are wrong. The same property is also defined for ti,tmp464 and is correct. The constraints should also not be under 'items' as this property is not an array. Cc: Jean Delvare Cc: Guenter Roeck Cc: Krzysztof Kozlowski Cc: linux-hwmon@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220413134729.3112190-1-robh@kernel.org Signed-off-by: Guenter Roeck --- Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml index 36f649938fb7..a6f1fa75a67c 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml @@ -58,10 +58,9 @@ patternProperties: description: | The value (two's complement) to be programmed in the channel specific N correction register. For remote channels only. - $ref: /schemas/types.yaml#/definitions/uint32 - items: - minimum: 0 - maximum: 255 + $ref: /schemas/types.yaml#/definitions/int32 + minimum: -128 + maximum: 127 required: - reg From df96e96a8c6f61db17a640cbac00494d13ed0779 Mon Sep 17 00:00:00 2001 From: Sergiu Moga Date: Thu, 10 Mar 2022 13:45:51 +0200 Subject: [PATCH 084/803] ARM: dts: at91: sama7g5: Swap `rx` and `tx` for `i2c` nodes Swap `rx` and `tx` for the `dma-names` property of the `i2c` nodes in order to maintain consistency across Microchip/Atmel SoC files. Signed-off-by: Sergiu Moga Reviewed-by: Tudor Ambarus Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220310114553.184763-2-sergiu.moga@microchip.com --- arch/arm/boot/dts/sama7g5.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi index 4decd3a91a76..f691c8f08d04 100644 --- a/arch/arm/boot/dts/sama7g5.dtsi +++ b/arch/arm/boot/dts/sama7g5.dtsi @@ -601,9 +601,9 @@ #size-cells = <0>; clocks = <&pmc PMC_TYPE_PERIPHERAL 39>; atmel,fifo-size = <32>; - dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>, - <&dma0 AT91_XDMAC_DT_PERID(8)>; - dma-names = "rx", "tx"; + dmas = <&dma0 AT91_XDMAC_DT_PERID(8)>, + <&dma0 AT91_XDMAC_DT_PERID(7)>; + dma-names = "tx", "rx"; status = "disabled"; }; }; @@ -786,9 +786,9 @@ #size-cells = <0>; clocks = <&pmc PMC_TYPE_PERIPHERAL 46>; atmel,fifo-size = <32>; - dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>, - <&dma0 AT91_XDMAC_DT_PERID(22)>; - dma-names = "rx", "tx"; + dmas = <&dma0 AT91_XDMAC_DT_PERID(22)>, + <&dma0 AT91_XDMAC_DT_PERID(21)>; + dma-names = "tx", "rx"; status = "disabled"; }; }; @@ -810,9 +810,9 @@ #size-cells = <0>; clocks = <&pmc PMC_TYPE_PERIPHERAL 47>; atmel,fifo-size = <32>; - dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>, - <&dma0 AT91_XDMAC_DT_PERID(24)>; - dma-names = "rx", "tx"; + dmas = <&dma0 AT91_XDMAC_DT_PERID(24)>, + <&dma0 AT91_XDMAC_DT_PERID(23)>; + dma-names = "tx", "rx"; status = "disabled"; }; }; From 3f7ce6d7091765ed6c67c5d78aa364b9d17e3aab Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 7 Mar 2022 13:38:27 +0200 Subject: [PATCH 085/803] ARM: dts: at91: sama7g5ek: enable pull-up on flexcom3 console lines Flexcom3 is used as board console serial. There are no pull-ups on these lines on the board. This means that if a cable is not connected (that has pull-ups included), stray characters could appear on the console as the floating pins voltage levels are interpreted as incoming characters. To avoid this problem, enable the internal pull-ups on these lines. Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Eugen Hristev Reviewed-by: Tudor Ambarus Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220307113827.2419331-1-eugen.hristev@microchip.com --- arch/arm/boot/dts/at91-sama7g5ek.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index 08685a10eda1..dd047a852390 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -495,7 +495,7 @@ pinctrl_flx3_default: flx3_default { pinmux = , ; - bias-disable; + bias-pull-up; }; pinctrl_flx4_default: flx4_default { From 68a9345536daf199147d2ef07ec2ef0df43672ac Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 6 Apr 2022 16:05:05 +0300 Subject: [PATCH 086/803] ARM: dts: at91: sama7g5ek: Align the impedance of the QSPI0's HSIO and PCB lines The impedance of the QSPI PCB lines on the sama7g5ek is 50 Ohms. Align the output impedance of the QSPI0 HSIOs by setting a medium drive strength which corresponds to an impedance of 56 Ohms when VDD is in the 3.0V - 3.6V range. The high drive strength setting corresponds to an output impedance of 42 Ohms on the QSPI0 HSIOs. Suggested-by: Mihai Sain Signed-off-by: Tudor Ambarus Reviewed-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220406130505.422042-1-tudor.ambarus@microchip.com --- arch/arm/boot/dts/at91-sama7g5ek.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index dd047a852390..d83f76a6cd6a 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -655,7 +655,7 @@ ; bias-disable; slew-rate = <0>; - atmel,drive-strength = ; + atmel,drive-strength = ; }; pinctrl_sdmmc0_default: sdmmc0_default { From e5628110bb6669330c3f0cadcc7f486de0007355 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 16:32:22 +0200 Subject: [PATCH 087/803] ARM: dts: at91: align SPI NOR node name with dtschema The node names should be generic and SPI NOR dtschema expects "flash". Signed-off-by: Krzysztof Kozlowski Reviewed-by: Tudor Ambarus Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220407143223.295344-1-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/at91-dvk_su60_somc.dtsi | 2 +- arch/arm/boot/dts/at91-q5xr5.dts | 2 +- arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi | 2 +- arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts | 2 +- arch/arm/boot/dts/at91-sama5d2_xplained.dts | 2 +- arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi | 2 +- arch/arm/boot/dts/at91-sama5d4ek.dts | 2 +- arch/arm/boot/dts/at91-vinco.dts | 2 +- arch/arm/boot/dts/at91sam9n12ek.dts | 2 +- arch/arm/boot/dts/at91sam9x5ek.dtsi | 2 +- arch/arm/boot/dts/sama5d3xmb.dtsi | 2 +- arch/arm/boot/dts/sama5d3xmb_cmp.dtsi | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi b/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi index c1c8650dafce..3542ad8a243e 100644 --- a/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi +++ b/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi @@ -44,7 +44,7 @@ status = "okay"; /* spi0.0: 4M Flash Macronix MX25R4035FM1IL0 */ - spi-flash@0 { + flash@0 { compatible = "mxicy,mx25u4035", "jedec,spi-nor"; spi-max-frequency = <33000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91-q5xr5.dts b/arch/arm/boot/dts/at91-q5xr5.dts index 47a00062f01f..9cf60b6f695c 100644 --- a/arch/arm/boot/dts/at91-q5xr5.dts +++ b/arch/arm/boot/dts/at91-q5xr5.dts @@ -125,7 +125,7 @@ cs-gpios = <&pioA 3 GPIO_ACTIVE_HIGH>, <&pioC 11 GPIO_ACTIVE_LOW>, <0>, <0>; status = "okay"; - m25p80@0 { + flash@0 { compatible = "jedec,spi-nor"; spi-max-frequency = <20000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi index 21c86171e462..ba621783acdb 100644 --- a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi +++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi @@ -214,7 +214,7 @@ pinctrl-0 = <&pinctrl_qspi1_default>; status = "disabled"; - qspi1_flash: spi_flash@0 { + qspi1_flash: flash@0 { #address-cells = <1>; #size-cells = <1>; compatible = "jedec,spi-nor"; diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts index c145c4e5ef58..5e8755f22784 100644 --- a/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts @@ -191,7 +191,7 @@ &qspi1 { status = "okay"; - qspi1_flash: spi_flash@0 { + qspi1_flash: flash@0 { status = "okay"; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index 9bf2ec0ba3e2..cdfe891f9a9e 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -137,7 +137,7 @@ pinctrl-0 = <&pinctrl_spi0_default>; status = "okay"; - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; reg = <0>; spi-max-frequency = <50000000>; diff --git a/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi b/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi index 710cb72bda5a..fd1086f52b40 100644 --- a/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi +++ b/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi @@ -49,7 +49,7 @@ cs-gpios = <&pioC 3 0>, <0>, <0>, <0>; status = "okay"; - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index fe432b6b7e95..7017f626f362 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -65,7 +65,7 @@ spi0: spi@f8010000 { cs-gpios = <&pioC 3 0>, <0>, <0>, <0>; status = "okay"; - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91-vinco.dts b/arch/arm/boot/dts/at91-vinco.dts index a51a3372afa1..ebeaa6ab500e 100644 --- a/arch/arm/boot/dts/at91-vinco.dts +++ b/arch/arm/boot/dts/at91-vinco.dts @@ -59,7 +59,7 @@ spi0: spi@f8010000 { cs-gpios = <&pioC 3 0>, <0>, <0>, <0>; status = "okay"; - m25p80@0 { + flash@0 { compatible = "n25q32b", "jedec,spi-nor"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts index 2bc4e6e0a923..c905d7bfc771 100644 --- a/arch/arm/boot/dts/at91sam9n12ek.dts +++ b/arch/arm/boot/dts/at91sam9n12ek.dts @@ -119,7 +119,7 @@ spi0: spi@f0000000 { status = "okay"; cs-gpios = <&pioA 14 0>, <0>, <0>, <0>; - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91sam9x5ek.dtsi b/arch/arm/boot/dts/at91sam9x5ek.dtsi index 6d1264de6060..5f4eaa618ab4 100644 --- a/arch/arm/boot/dts/at91sam9x5ek.dtsi +++ b/arch/arm/boot/dts/at91sam9x5ek.dtsi @@ -125,7 +125,7 @@ cs-gpios = <&pioA 14 0>, <0>, <0>, <0>; status = "disabled"; /* conflicts with mmc1 */ - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/sama5d3xmb.dtsi b/arch/arm/boot/dts/sama5d3xmb.dtsi index a499de8a7a64..3652c9e24124 100644 --- a/arch/arm/boot/dts/sama5d3xmb.dtsi +++ b/arch/arm/boot/dts/sama5d3xmb.dtsi @@ -26,7 +26,7 @@ spi0: spi@f0004000 { dmas = <0>, <0>; /* Do not use DMA for spi0 */ - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi b/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi index fa9e5e2a745d..5d9e97fecf83 100644 --- a/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi +++ b/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi @@ -25,7 +25,7 @@ spi0: spi@f0004000 { dmas = <0>, <0>; /* Do not use DMA for spi0 */ - m25p80@0 { + flash@0 { compatible = "atmel,at25df321a"; spi-max-frequency = <50000000>; reg = <0>; From 4a6471e65050fef99559354bac97b551310f985c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 12 Apr 2022 12:50:13 +0200 Subject: [PATCH 088/803] ARM: dts: at91: use generic node name for dataflash The node names should be generic, so use "flash" for dataflash nodes and for cfi-flash. Suggested-by: Tudor Ambarus Signed-off-by: Krzysztof Kozlowski Reviewed-by: Tudor Ambarus Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220412105013.249793-1-krzysztof.kozlowski@linaro.org --- Documentation/devicetree/bindings/mfd/atmel-flexcom.txt | 2 +- arch/arm/boot/dts/at91rm9200ek.dts | 4 ++-- arch/arm/boot/dts/at91sam9260ek.dts | 2 +- arch/arm/boot/dts/at91sam9261ek.dts | 2 +- arch/arm/boot/dts/at91sam9263ek.dts | 2 +- arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 2 +- arch/arm/boot/dts/at91sam9m10g45ek.dts | 2 +- arch/arm/boot/dts/at91sam9rlek.dts | 2 +- arch/arm/boot/dts/usb_a9263.dts | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt b/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt index 692300117c64..9d837535637b 100644 --- a/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt +++ b/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt @@ -54,7 +54,7 @@ flexcom@f8034000 { clock-names = "spi_clk"; atmel,fifo-size = <32>; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at25f512b"; reg = <0>; spi-max-frequency = <20000000>; diff --git a/arch/arm/boot/dts/at91rm9200ek.dts b/arch/arm/boot/dts/at91rm9200ek.dts index e1ef4e44e663..4624a6f076f8 100644 --- a/arch/arm/boot/dts/at91rm9200ek.dts +++ b/arch/arm/boot/dts/at91rm9200ek.dts @@ -73,7 +73,7 @@ spi0: spi@fffe0000 { status = "okay"; cs-gpios = <&pioA 3 0>, <0>, <0>, <0>; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <15000000>; reg = <0>; @@ -94,7 +94,7 @@ status = "okay"; }; - nor_flash@10000000 { + flash@10000000 { compatible = "cfi-flash"; reg = <0x10000000 0x800000>; linux,mtd-name = "physmap-flash.0"; diff --git a/arch/arm/boot/dts/at91sam9260ek.dts b/arch/arm/boot/dts/at91sam9260ek.dts index ce96345d28a3..6381088ba24f 100644 --- a/arch/arm/boot/dts/at91sam9260ek.dts +++ b/arch/arm/boot/dts/at91sam9260ek.dts @@ -92,7 +92,7 @@ spi0: spi@fffc8000 { cs-gpios = <0>, <&pioC 11 0>, <0>, <0>; - mtd_dataflash@1 { + flash@1 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <50000000>; reg = <1>; diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts index beed819609e8..8f11c0b7d76d 100644 --- a/arch/arm/boot/dts/at91sam9261ek.dts +++ b/arch/arm/boot/dts/at91sam9261ek.dts @@ -145,7 +145,7 @@ cs-gpios = <&pioA 3 0>, <0>, <&pioA 28 0>, <0>; status = "okay"; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; reg = <0>; spi-max-frequency = <15000000>; diff --git a/arch/arm/boot/dts/at91sam9263ek.dts b/arch/arm/boot/dts/at91sam9263ek.dts index 71f60576761a..42e734020235 100644 --- a/arch/arm/boot/dts/at91sam9263ek.dts +++ b/arch/arm/boot/dts/at91sam9263ek.dts @@ -95,7 +95,7 @@ spi0: spi@fffa4000 { status = "okay"; cs-gpios = <&pioA 5 0>, <0>, <0>, <0>; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <50000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 87bb39060e8b..74b90dc58cbf 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -110,7 +110,7 @@ spi0: spi@fffc8000 { cs-gpios = <0>, <&pioC 11 0>, <0>, <0>; - mtd_dataflash@1 { + flash@1 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <50000000>; reg = <1>; diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts index b6256a20fbc7..e5db198a87a8 100644 --- a/arch/arm/boot/dts/at91sam9m10g45ek.dts +++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts @@ -167,7 +167,7 @@ spi0: spi@fffa4000{ status = "okay"; cs-gpios = <&pioB 3 0>, <0>, <0>, <0>; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <13000000>; reg = <0>; diff --git a/arch/arm/boot/dts/at91sam9rlek.dts b/arch/arm/boot/dts/at91sam9rlek.dts index 62981b39c815..d74b8d9d84aa 100644 --- a/arch/arm/boot/dts/at91sam9rlek.dts +++ b/arch/arm/boot/dts/at91sam9rlek.dts @@ -180,7 +180,7 @@ spi0: spi@fffcc000 { status = "okay"; cs-gpios = <&pioA 28 0>, <0>, <0>, <0>; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; spi-max-frequency = <15000000>; reg = <0>; diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts index 8a0cfbfd0c45..b6cb9cdf8197 100644 --- a/arch/arm/boot/dts/usb_a9263.dts +++ b/arch/arm/boot/dts/usb_a9263.dts @@ -60,7 +60,7 @@ spi0: spi@fffa4000 { cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>; status = "okay"; - mtd_dataflash@0 { + flash@0 { compatible = "atmel,at45", "atmel,dataflash"; reg = <0>; spi-max-frequency = <15000000>; From 3891222d88ad5b9983b132135609e00e05884b25 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:09:47 -0500 Subject: [PATCH 089/803] ARM: dts: at91: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Reviewed-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/Yk3leykDEKGBN8rk@robh.at.kernel.org --- arch/arm/boot/dts/at91-kizbox3-hs.dts | 2 +- arch/arm/boot/dts/at91-kizbox3_common.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-kizbox3-hs.dts b/arch/arm/boot/dts/at91-kizbox3-hs.dts index 2799b2a1f4d2..f7d90cf1bb77 100644 --- a/arch/arm/boot/dts/at91-kizbox3-hs.dts +++ b/arch/arm/boot/dts/at91-kizbox3-hs.dts @@ -225,7 +225,7 @@ pinctrl_pio_io_reset: gpio_io_reset { pinmux = ; bias-disable; - drive-open-drain = <1>; + drive-open-drain; output-low; }; pinctrl_pio_input: gpio_input { diff --git a/arch/arm/boot/dts/at91-kizbox3_common.dtsi b/arch/arm/boot/dts/at91-kizbox3_common.dtsi index abe27adfa4d6..465664628419 100644 --- a/arch/arm/boot/dts/at91-kizbox3_common.dtsi +++ b/arch/arm/boot/dts/at91-kizbox3_common.dtsi @@ -211,7 +211,7 @@ pinmux = , //DATA ; //CLK bias-disable; - drive-open-drain = <1>; + drive-open-drain; }; pinctrl_pwm0 { From 0e486fe341fabd8e583f3d601a874cd394979c45 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 Apr 2022 11:28:05 +0100 Subject: [PATCH 090/803] ARM: dts: at91: Map MCLK for wm8731 on at91sam9g20ek The MCLK of the WM8731 on the AT91SAM9G20-EK board is connected to the PCK0 output of the SoC and is expected to be set to 12MHz. Previously this was mapped using pre-common clock API calls in the audio machine driver but the conversion to the common clock framework broke that so describe things in the DT instead. Fixes: ff78a189b0ae55f ("ARM: at91: remove old at91-specific clock driver") Signed-off-by: Mark Brown Reviewed-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220404102806.581374-2-broonie@kernel.org --- arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 74b90dc58cbf..91df8ec27a3d 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -219,6 +219,12 @@ wm8731: wm8731@1b { compatible = "wm8731"; reg = <0x1b>; + + /* PCK0 at 12MHz */ + clocks = <&pmc PMC_TYPE_SYSTEM 8>; + clock-names = "mclk"; + assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>; + assigned-clock-rates = <12000000>; }; }; From afca68de401fceb1d52e1b6daec78e8b09f7f0a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 Apr 2022 11:28:06 +0100 Subject: [PATCH 091/803] ARM: dts: at91: Describe regulators on at91sam9g20ek The at91sam9g20ek has no software controllable regulators, only some fixed discrete regulators, but they are there and currently the wm8731 driver does try to use them. Show the supplies in the DT and map them for the wm8731 so things start up cleanly. Signed-off-by: Mark Brown Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220404102806.581374-3-broonie@kernel.org --- arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 37 +++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 91df8ec27a3d..85c17dd1c8d5 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -214,6 +214,7 @@ 24c512@50 { compatible = "atmel,24c512"; reg = <0x50>; + vcc-supply = <®_3v3>; }; wm8731: wm8731@1b { @@ -225,6 +226,11 @@ clock-names = "mclk"; assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>; assigned-clock-rates = <12000000>; + + HPVDD-supply = <&vcc_dac>; + AVDD-supply = <&vcc_dac>; + DCVDD-supply = <®_3v3>; + DBVDD-supply = <®_3v3>; }; }; @@ -260,4 +266,35 @@ atmel,ssc-controller = <&ssc0>; atmel,audio-codec = <&wm8731>; }; + + reg_5v: fixedregulator0 { + compatible = "regulator-fixed"; + regulator-name = "5V"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + reg_3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + vin-supply = <®_5v>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + reg_1v: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "1V"; + vin-supply = <®_5v>; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + }; + + vcc_dac: fixedregulator3 { + compatible = "regulator-fixed"; + regulator-name = "VCC_DAC"; + vin-supply = <®_3v3>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; }; From 5c8b49852910caffeebb1ce541fdd264ffc691b8 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 31 Mar 2022 17:13:22 +0300 Subject: [PATCH 092/803] ARM: dts: at91: sama5d4_xplained: fix pinctrl phandle name Pinctrl phandle is for spi1 so rename it to reflect this. Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220331141323.194355-1-claudiu.beznea@microchip.com --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index d241c24f0d83..accb92cfac44 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -82,7 +82,7 @@ spi1: spi@fc018000 { pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi0_cs>; + pinctrl-0 = <&pinctrl_spi1_cs>; cs-gpios = <&pioB 21 0>; status = "okay"; }; @@ -140,7 +140,7 @@ atmel,pins = ; }; - pinctrl_spi0_cs: spi0_cs_default { + pinctrl_spi1_cs: spi1_cs_default { atmel,pins = ; }; From 0c640d9544d0109da3889d71ae77301e556db977 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 31 Mar 2022 17:13:23 +0300 Subject: [PATCH 093/803] ARM: dts: at91: fix pinctrl phandles Commit bf781869e5cf ("ARM: dts: at91: add pinctrl-{names, 0} for all gpios") introduces pinctrl phandles for pins used by individual controllers to avoid failures due to commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges"). For SPI controllers available on SAMA5D4 and SAMA5D3 some of the pins are defined in SoC specific dtsi on behalf of pinctrl-0. Adding extra pinctrl phandles on board specific dts also on behalf of pinctrl-0 overwrite the pinctrl-0 phandle specified in SoC specific dtsi. Thus add the board specific pinctrl to pinctrl-1. Fixes: bf781869e5cf ("ARM: dts: at91: add pinctrl-{names, 0} for all gpios") Depends-on: 5c8b49852910 ("ARM: dts: at91: sama5d4_xplained: fix pinctrl phandle name") Reported-by: Ajay Kathat Co-developed-by: Ajay Kathat Signed-off-by: Ajay Kathat Tested-by: Ajay Kathat Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20220331141323.194355-2-claudiu.beznea@microchip.com --- arch/arm/boot/dts/at91-sama5d3_xplained.dts | 8 ++++---- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index d72c042f2850..a49c2966b41e 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -57,8 +57,8 @@ }; spi0: spi@f0004000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi0_cs>; + pinctrl-names = "default", "cs"; + pinctrl-1 = <&pinctrl_spi0_cs>; cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>; status = "okay"; }; @@ -171,8 +171,8 @@ }; spi1: spi@f8008000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi1_cs>; + pinctrl-names = "default", "cs"; + pinctrl-1 = <&pinctrl_spi1_cs>; cs-gpios = <&pioC 25 0>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index accb92cfac44..e519d2747936 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -81,8 +81,8 @@ }; spi1: spi@fc018000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi1_cs>; + pinctrl-names = "default", "cs"; + pinctrl-1 = <&pinctrl_spi1_cs>; cs-gpios = <&pioB 21 0>; status = "okay"; }; From d644e0d79829b1b9a14beedbdb0dc1256fc3677d Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 1 Mar 2022 02:46:11 +0000 Subject: [PATCH 094/803] phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe The pm_runtime_enable will increase power disable depth. If the probe fails, we should use pm_runtime_disable() to balance pm_runtime_enable(). And use pm_runtime_dont_use_autosuspend() to undo pm_runtime_use_autosuspend() In the PM Runtime docs: Drivers in ->remove() callback should undo the runtime PM changes done in ->probe(). Usually this means calling pm_runtime_disable(), pm_runtime_dont_use_autosuspend() etc. We should do this in error handling. Fixes: f7f50b2a7b05 ("phy: mapphone-mdm6600: Add runtime PM support for n_gsm on USB suspend") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220301024615.31899-1-linmq006@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 5172971f4c36..3cd4d51c247c 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -629,7 +629,8 @@ idle: cleanup: if (error < 0) phy_mdm6600_device_power_off(ddata); - + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); return error; } From ce88613e5bd579478653a028291098143f2a5bdf Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 1 Mar 2022 02:58:49 +0000 Subject: [PATCH 095/803] phy: ti: Add missing pm_runtime_disable() in serdes_am654_probe The pm_runtime_enable() will increase power disable depth. If the probe fails, we should use pm_runtime_disable() to balance pm_runtime_enable(). Add missing pm_runtime_disable() for serdes_am654_probe(). Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220301025853.1911-1-linmq006@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-am654-serdes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index c1211c4f863c..0be727bb9f79 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -838,7 +838,7 @@ static int serdes_am654_probe(struct platform_device *pdev) clk_err: of_clk_del_provider(node); - + pm_runtime_disable(dev); return ret; } From 2f3724930eb4bba74f7d10bc3bef5bb22dd323df Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 12 Apr 2022 15:00:32 -0700 Subject: [PATCH 096/803] interconnect: qcom: sc7180: Drop IP0 interconnects The IPA BCM resource ("IP0") on sc7180 was moved to the clk-rpmh driver in commit bcd63d222b60 ("clk: qcom: rpmh: Add IPA clock for SC7180") and modeled as a clk, but this interconnect driver still had it modeled as an interconnect. This was mostly OK because nobody used the interconnect definition, until the interconnect framework started dropping bandwidth requests on interconnects that aren't used via the sync_state callback in commit 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state"). Once that patch was applied the IP0 resource was going to be controlled from two places, the clk framework and the interconnect framework. Even then, things were probably going to be OK, because commit b95b668eaaa2 ("interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate") was needed to actually drop bandwidth requests on unused interconnects, of which the IPA was one of the interconnect that wasn't getting dropped to zero. Combining the three commits together leads to bad behavior where the interconnect framework is disabling the IP0 resource because it has no users while the clk framework thinks the IP0 resource is on because the only user, the IPA driver, has turned it on via clk_prepare_enable(). Depending on when sync_state is called, we can get into a situation like below: IPA driver probes IPA driver gets notified modem started runtime PM get() IPA clk enabled -> IP0 resource is ON sync_state runs interconnect zeroes out the IP0 resource -> IP0 resource is off IPA driver tries to access a register and blows up The crash is an unclocked access that manifest as an SError. SError Interrupt on CPU0, code 0xbe000011 -- SError CPU: 0 PID: 3595 Comm: mmdata_mgr Not tainted 5.17.1+ #166 Hardware name: Google Lazor (rev1 - 2) with LTE (DT) pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mutex_lock+0x4c/0x80 lr : mutex_lock+0x30/0x80 sp : ffffffc00da9b9c0 x29: ffffffc00da9b9c0 x28: 0000000000000000 x27: 0000000000000000 x26: ffffffc00da9bc90 x25: ffffff80c2024010 x24: ffffff80c2024000 x23: ffffff8083100000 x22: ffffff80831000d0 x21: ffffff80831000a8 x20: ffffff80831000a8 x19: ffffff8083100070 x18: 00000000ffff0a00 x17: 000000002f7254f1 x16: 0000000000000100 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 000000000001f0b8 x10: ffffffc00931f0b8 x9 : 0000000000000000 x8 : 0000000000000000 x7 : fefefefefeff2f60 x6 : 0000808080808080 x5 : 0000000000000000 x4 : 8080808080800000 x3 : ffffff80d2d4ee28 x2 : ffffff808c1d6e40 x1 : 0000000000000000 x0 : ffffff8083100070 Kernel panic - not syncing: Asynchronous SError Interrupt CPU: 0 PID: 3595 Comm: mmdata_mgr Not tainted 5.17.1+ #166 Hardware name: Google Lazor (rev1 - 2) with LTE (DT) Call trace: dump_backtrace+0xf4/0x114 show_stack+0x24/0x30 dump_stack_lvl+0x64/0x7c dump_stack+0x18/0x38 panic+0x150/0x38c nmi_panic+0x88/0xa0 arm64_serror_panic+0x74/0x80 do_serror+0x0/0x80 do_serror+0x58/0x80 el1h_64_error_handler+0x34/0x4c el1h_64_error+0x78/0x7c mutex_lock+0x4c/0x80 __gsi_channel_start+0x50/0x17c gsi_channel_start+0x54/0x90 ipa_endpoint_enable_one+0x34/0xc0 ipa_open+0x4c/0x120 Remove all IP0 resource management from the interconnect driver so that clk-rpmh is the sole owner. This fixes the issue by preventing the interconnect driver from overwriting the IP0 resource data that the clk-rpmh driver wrote. Cc: Alex Elder Cc: Bjorn Andersson Cc: Taniya Das Cc: Mike Tipton Fixes: b95b668eaaa2 ("interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate") Fixes: bcd63d222b60 ("clk: qcom: rpmh: Add IPA clock for SC7180") Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state") Signed-off-by: Stephen Boyd Tested-by: Alex Elder Reviewed-by: Alex Elder Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220412220033.1273607-2-swboyd@chromium.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sc7180.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c index 12d59c36df53..5f7c0f85fa8e 100644 --- a/drivers/interconnect/qcom/sc7180.c +++ b/drivers/interconnect/qcom/sc7180.c @@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC); DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC); DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC); -DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE); DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1); DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC); DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC); @@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4); DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC); DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC); DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4); -DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8); DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4); DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC); DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC); @@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi); DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc); DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf); DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto); -DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave); DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc); DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9); DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu); @@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = { .num_bcms = ARRAY_SIZE(gem_noc_bcms), }; -static struct qcom_icc_bcm *ipa_virt_bcms[] = { - &bcm_ip0, -}; - -static struct qcom_icc_node *ipa_virt_nodes[] = { - [MASTER_IPA_CORE] = &ipa_core_master, - [SLAVE_IPA_CORE] = &ipa_core_slave, -}; - -static struct qcom_icc_desc sc7180_ipa_virt = { - .nodes = ipa_virt_nodes, - .num_nodes = ARRAY_SIZE(ipa_virt_nodes), - .bcms = ipa_virt_bcms, - .num_bcms = ARRAY_SIZE(ipa_virt_bcms), -}; - static struct qcom_icc_bcm *mc_virt_bcms[] = { &bcm_acv, &bcm_mc0, @@ -519,8 +500,6 @@ static const struct of_device_id qnoc_of_match[] = { .data = &sc7180_dc_noc}, { .compatible = "qcom,sc7180-gem-noc", .data = &sc7180_gem_noc}, - { .compatible = "qcom,sc7180-ipa-virt", - .data = &sc7180_ipa_virt}, { .compatible = "qcom,sc7180-mc-virt", .data = &sc7180_mc_virt}, { .compatible = "qcom,sc7180-mmss-noc", From 2fb251c265608636fc961b7d38e1a03937e57371 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 12 Apr 2022 15:00:33 -0700 Subject: [PATCH 097/803] interconnect: qcom: sdx55: Drop IP0 interconnects Similar to the sc7180 commit, let's drop the IP0 interconnects here because the IP0 resource is also used in the clk-rpmh driver on sdx55. It's bad to have the clk framework and interconnect framework control the same RPMh resource without any coordination. The rpmh driver in the kernel doesn't aggregate resources between clients either, so leaving control to clk-rpmh avoids any issues with unused interconnects turning off IP0 behind the back of the clk framework. Cc: Alex Elder Cc: Manivannan Sadhasivam Cc: Bjorn Andersson Cc: Taniya Das Cc: Mike Tipton Fixes: b2150cab9a97 ("clk: qcom: rpmh: add support for SDX55 rpmh IPA clock") Signed-off-by: Stephen Boyd Reviewed-by: Alex Elder Acked-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220412220033.1273607-3-swboyd@chromium.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sdx55.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c index 03d604f84cc5..e3ac25a997b7 100644 --- a/drivers/interconnect/qcom/sdx55.c +++ b/drivers/interconnect/qcom/sdx55.c @@ -18,7 +18,6 @@ #include "icc-rpmh.h" #include "sdx55.h" -DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE); DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0); DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC); DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC); @@ -40,7 +39,6 @@ DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC); DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG); DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO); DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC); -DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8); DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4); DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0); DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC); @@ -82,7 +80,6 @@ DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8); DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi); DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc); DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto); -DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave); DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg); DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr); DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie); @@ -219,22 +216,6 @@ static const struct qcom_icc_desc sdx55_system_noc = { .num_bcms = ARRAY_SIZE(system_noc_bcms), }; -static struct qcom_icc_bcm *ipa_virt_bcms[] = { - &bcm_ip0, -}; - -static struct qcom_icc_node *ipa_virt_nodes[] = { - [MASTER_IPA_CORE] = &ipa_core_master, - [SLAVE_IPA_CORE] = &ipa_core_slave, -}; - -static const struct qcom_icc_desc sdx55_ipa_virt = { - .nodes = ipa_virt_nodes, - .num_nodes = ARRAY_SIZE(ipa_virt_nodes), - .bcms = ipa_virt_bcms, - .num_bcms = ARRAY_SIZE(ipa_virt_bcms), -}; - static const struct of_device_id qnoc_of_match[] = { { .compatible = "qcom,sdx55-mc-virt", .data = &sdx55_mc_virt}, @@ -242,8 +223,6 @@ static const struct of_device_id qnoc_of_match[] = { .data = &sdx55_mem_noc}, { .compatible = "qcom,sdx55-system-noc", .data = &sdx55_system_noc}, - { .compatible = "qcom,sdx55-ipa-virt", - .data = &sdx55_ipa_virt}, { } }; MODULE_DEVICE_TABLE(of, qnoc_of_match); From 09269dd050094593fc747f2a5853d189fefcb6b5 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 8 Mar 2022 14:00:20 +0100 Subject: [PATCH 098/803] ARM: dts: Fix mmc order for omap3-gta04 Commit a1ebdb374199 ("ARM: dts: Fix swapped mmc order for omap3") introduces general mmc aliases. Let's tailor them to the need of the GTA04 board which does not make use of mmc2 and mmc3 interfaces. Fixes: a1ebdb374199 ("ARM: dts: Fix swapped mmc order for omap3") Signed-off-by: H. Nikolaus Schaller Message-Id: Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-gta04.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 7e3d8147e2c1..0365f06165e9 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -31,6 +31,8 @@ aliases { display0 = &lcd; display1 = &tv0; + /delete-property/ mmc2; + /delete-property/ mmc3; }; ldo_3v3: fixedregulator { From c21a7434d6cc216a910dd35632617850f1751f4c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 14 Mar 2022 17:34:45 +0100 Subject: [PATCH 099/803] ARM: dts: am33xx-l4: Add missing touchscreen clock properties When adding support for TI magadc (Magnetic Stripe Reader and ADC), the MFD driver common to the touchscreen and the ADC got updated to ease the insertion of a new DT node for the ADC, with its own compatible, clocks, etc. Commit 235a96e92c16 ("mfd: ti_am335x_tscadc: Don't search the tree for our clock") removed one compatible specific information which was the clock name, because the clock was looked up from scratch in the DT while this hardware block was only fed by a single clock, already defined and properly filled in the DT. Problem is, this change was only validated with an am437x-based board, where the clocks are effectively correctly defined and referenced. But on am33xx, the ADC clock is also correctly defined but is not referenced with a clock phandle as it ought to be. The touchscreen bindings clearly state that the clocks/clock-names properties are mandatory, but they have been forgotten in one DTSI. This was probably not noticed in the first place because of the clock actually existing and the clk_get() call going through all the tree anyway. Add the missing clock phandles in the am33xx touchscreen description. Reported-by: H. Nikolaus Schaller Fixes: 235a96e92c16 ("mfd: ti_am335x_tscadc: Don't search the tree for our clock") Signed-off-by: Miquel Raynal Tested-by: H. Nikolaus Schaller Message-Id: <20220314163445.79807-1-miquel.raynal@bootlin.com> Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx-l4.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index c9629cb5ccd1..7da42a5b959c 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -263,6 +263,8 @@ compatible = "ti,am3359-tscadc"; reg = <0x0 0x1000>; interrupts = <16>; + clocks = <&adc_tsc_fck>; + clock-names = "fck"; status = "disabled"; dmas = <&edma 53 0>, <&edma 57 0>; dma-names = "fifo0", "fifo1"; From 942da3af32b2288e674736eb159d1fc676261691 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 26 Feb 2022 15:48:19 -0600 Subject: [PATCH 100/803] ARM: dts: am3517-evm: Fix misc pinmuxing The bootloader for the AM3517 has previously done much of the pin muxing, but as the bootloader is moving more and more to a model based on the device tree, it may no longer automatically mux the pins, so it is necessary to add the pinmuxing to the Linux device trees so the respective peripherals can remain functional. Fixes: 6ed1d7997561 ("ARM: dts: am3517-evm: Add support for UI board and Audio") Signed-off-by: Adam Ford Message-Id: <20220226214820.747847-1-aford173@gmail.com> Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517-evm.dts | 45 +++++++++++++++++++++++++++---- arch/arm/boot/dts/am3517-som.dtsi | 9 +++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index 0d2fac98ce7d..c8b80f156ec9 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -161,6 +161,8 @@ /* HS USB Host PHY on PORT 1 */ hsusb1_phy: hsusb1_phy { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_rst_pins>; compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */ #phy-cells = <0>; @@ -168,7 +170,9 @@ }; &davinci_emac { - status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <ðernet_pins>; + status = "okay"; }; &davinci_mdio { @@ -193,6 +197,8 @@ }; &i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_pins>; clock-frequency = <400000>; /* User DIP swithes [1:8] / User LEDS [1:2] */ tca6416: gpio@21 { @@ -205,6 +211,8 @@ }; &i2c3 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c3_pins>; clock-frequency = <400000>; }; @@ -223,6 +231,8 @@ }; &usbhshost { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_pins>; port1-mode = "ehci-phy"; }; @@ -231,8 +241,35 @@ }; &omap3_pmx_core { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_rst_pins>; + + ethernet_pins: pinmux_ethernet_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */ + OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */ + OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */ + OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */ + OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */ + OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */ + OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */ + OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */ + OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */ + OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */ + >; + }; + + i2c2_pins: pinmux_i2c2_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_scl */ + OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_sda */ + >; + }; + + i2c3_pins: pinmux_i2c3_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_scl */ + OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_sda */ + >; + }; leds_pins: pinmux_leds_pins { pinctrl-single,pins = < @@ -300,8 +337,6 @@ }; &omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_pins>; hsusb1_pins: pinmux_hsusb1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index 8b669e2eafec..f7b680f6c48a 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -69,6 +69,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <400000>; s35390a: s35390a@30 { @@ -179,6 +181,13 @@ &omap3_pmx_core { + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_sda */ + >; + }; + wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4) /* mmc1_dat7.gpio_129 */ From 46ff3df87215ff42c0cd2c4bdb7d74540384a69c Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 3 Mar 2022 11:18:17 -0600 Subject: [PATCH 101/803] ARM: dts: logicpd-som-lv: Fix wrong pinmuxing on OMAP35 The pinout of the OMAP35 and DM37 variants of the SOM-LV are the same, but the macros which define the pinmuxing are different between OMAP3530 and DM3730. The pinmuxing was correct for for the DM3730, but wrong for the OMAP3530. Since the boot loader was correctly pin-muxing the pins, this was not obvious. As the bootloader not guaranteed to pinmux all the pins any more, this causes an issue, so the pinmux needs to be moved from a common file to their respective board files. Fixes: f8a2e3ff7103 ("ARM: dts: Add minimal support for LogicPD OMAP35xx SOM-LV devkit") Signed-off-by: Adam Ford Message-Id: <20220303171818.11060-1-aford173@gmail.com> Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts | 15 +++++++++++++++ arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts | 15 +++++++++++++++ arch/arm/boot/dts/logicpd-som-lv.dtsi | 15 --------------- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts index 2a0a98fe67f0..3240c67e0c39 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index a604d92221a4..c757f0d7781c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom DM3730 SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index b56524cc7fe2..55b619c99e24 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -265,21 +265,6 @@ }; }; -&omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb2_2_pins>; - hsusb2_2_pins: pinmux_hsusb2_2_pins { - pinctrl-single,pins = < - OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ - OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ - OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ - OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ - OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ - OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ - >; - }; -}; - &uart2 { interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>; pinctrl-names = "default"; From 2a7ccf6bb6f147f64c025ad68f4255d8e1e0ce6d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Apr 2022 13:02:09 +0200 Subject: [PATCH 102/803] USB: quirks: add a Realtek card reader This device is reported to stall when enummerated. Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20220414110209.30924-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d3c14b5ed4a1..8ce8c0d06c66 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/ + { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, From ec547af8a9ea6441864bad34172676b5652ceb96 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Apr 2022 14:31:52 +0200 Subject: [PATCH 103/803] USB: quirks: add STRING quirk for VCOM device This has been reported to stall if queried Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20220414123152.1700-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8ce8c0d06c66..97b44a68668a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -510,6 +510,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* VCOM device */ + { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, From 11451693e4081d32ef65147c6ca08cd0094ae252 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:06 -0700 Subject: [PATCH 104/803] tty: n_gsm: fix missing mux reset on config change at responder Currently, only the initiator resets the mux protocol if the user requests new parameters that are incompatible to those of the current connection. The responder also needs to reset the multiplexer if the new parameter set requires this. Otherwise, we end up with an inconsistent parameter set between initiator and responder. Revert the old behavior to inform the peer upon an incompatible parameter set change from the user on the responder side by re-establishing the mux protocol in such case. Fixes: 509067bbd264 ("tty: n_gsm: Delete gsm_disconnect when config requester") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index fa92f727fdf8..3d28ecebd473 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2373,7 +2373,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) * configuration */ - if (gsm->initiator && (need_close || need_restart)) { + if (need_close || need_restart) { int ret; ret = gsm_disconnect(gsm); From aa371e96f05dcb36a88298f5cb70aa7234d5e8b8 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:07 -0700 Subject: [PATCH 105/803] tty: n_gsm: fix restart handling via CLD command n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.8.2 states that both sides will revert to the non-multiplexed mode via a close-down message (CLD). The usual program flow is as following: - start multiplex mode by sending AT+CMUX to the mobile - establish the control channel (DLCI 0) - establish user channels (DLCI >0) - terminate user channels - send close-down message (CLD) - revert to AT protocol (i.e. leave multiplexed mode) The AT protocol is out of scope of the n_gsm driver. However, gsm_disconnect() sends CLD if gsm_config() detects that the requested parameters require the mux protocol to restart. The next immediate action is to start the mux protocol by opening DLCI 0 again. Any responder side which handles CLD commands correctly forces us to fail at this point because AT+CMUX needs to be sent to the mobile to start the mux again. Therefore, remove the CLD command in this phase and keep both sides in multiplexed mode. Remove the gsm_disconnect() function as it become unnecessary and merge the remaining parts into gsm_cleanup_mux() to handle the termination order and locking correctly. Fixes: 71e077915396 ("tty: n_gsm: do not send/receive in ldisc close path") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 68 +++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 3d28ecebd473..daaffcfadaae 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2106,49 +2106,35 @@ static void gsm_error(struct gsm_mux *gsm) gsm->io_error++; } -static int gsm_disconnect(struct gsm_mux *gsm) -{ - struct gsm_dlci *dlci = gsm->dlci[0]; - struct gsm_control *gc; - - if (!dlci) - return 0; - - /* In theory disconnecting DLCI 0 is sufficient but for some - modems this is apparently not the case. */ - gc = gsm_control_send(gsm, CMD_CLD, NULL, 0); - if (gc) - gsm_control_wait(gsm, gc); - - del_timer_sync(&gsm->t2_timer); - /* Now we are sure T2 has stopped */ - - gsm_dlci_begin_close(dlci); - wait_event_interruptible(gsm->event, - dlci->state == DLCI_CLOSED); - - if (signal_pending(current)) - return -EINTR; - - return 0; -} - /** * gsm_cleanup_mux - generic GSM protocol cleanup * @gsm: our mux + * @disc: disconnect link? * * Clean up the bits of the mux which are the same for all framing * protocols. Remove the mux from the mux table, stop all the timers * and then shut down each device hanging up the channels as we go. */ -static void gsm_cleanup_mux(struct gsm_mux *gsm) +static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) { int i; struct gsm_dlci *dlci = gsm->dlci[0]; struct gsm_msg *txq, *ntxq; gsm->dead = true; + mutex_lock(&gsm->mutex); + + if (dlci) { + if (disc && dlci->state != DLCI_CLOSED) { + gsm_dlci_begin_close(dlci); + wait_event(gsm->event, dlci->state == DLCI_CLOSED); + } + dlci->dead = true; + } + + /* Finish outstanding timers, making sure they are done */ + del_timer_sync(&gsm->t2_timer); spin_lock(&gsm_mux_lock); for (i = 0; i < MAX_MUX; i++) { @@ -2162,13 +2148,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm) if (i == MAX_MUX) return; - del_timer_sync(&gsm->t2_timer); - /* Now we are sure T2 has stopped */ - if (dlci) - dlci->dead = true; - /* Free up any link layer users */ - mutex_lock(&gsm->mutex); for (i = 0; i < NUM_DLCI; i++) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); @@ -2370,19 +2350,11 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) /* * Close down what is needed, restart and initiate the new - * configuration + * configuration. On the first time there is no DLCI[0] + * and closing or cleaning up is not necessary. */ - - if (need_close || need_restart) { - int ret; - - ret = gsm_disconnect(gsm); - - if (ret) - return ret; - } - if (need_restart) - gsm_cleanup_mux(gsm); + if (need_close || need_restart) + gsm_cleanup_mux(gsm, true); gsm->initiator = c->initiator; gsm->mru = c->mru; @@ -2494,7 +2466,7 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) for (i = 1; i < NUM_DLCI; i++) tty_unregister_device(gsm_tty_driver, base + i); } - gsm_cleanup_mux(gsm); + gsm_cleanup_mux(gsm, false); tty_kref_put(gsm->tty); gsm->tty = NULL; } @@ -2597,7 +2569,7 @@ static int gsmld_open(struct tty_struct *tty) ret = gsmld_attach_gsm(tty, gsm); if (ret != 0) { - gsm_cleanup_mux(gsm); + gsm_cleanup_mux(gsm, false); mux_put(gsm); } return ret; From 1ec92e9742774bf42614fceea3bf6b50c9409225 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:08 -0700 Subject: [PATCH 106/803] tty: n_gsm: fix decoupled mux resource The active mux instances are managed in the gsm_mux array and via mux_get() and mux_put() functions separately. This gives a very loose coupling between the actual instance and the gsm_mux array which manages it. It also results in unnecessary lockings which makes it prone to failures. And it creates a race condition if more than the maximum number of mux instances are requested while the user changes the parameters of an active instance. The user may loose ownership of the current mux instance in this case. Fix this by moving the gsm_mux array handling to the mux allocation and deallocation functions. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 63 +++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index daaffcfadaae..f546dfe03d29 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2136,18 +2136,6 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) /* Finish outstanding timers, making sure they are done */ del_timer_sync(&gsm->t2_timer); - spin_lock(&gsm_mux_lock); - for (i = 0; i < MAX_MUX; i++) { - if (gsm_mux[i] == gsm) { - gsm_mux[i] = NULL; - break; - } - } - spin_unlock(&gsm_mux_lock); - /* open failed before registering => nothing to do */ - if (i == MAX_MUX) - return; - /* Free up any link layer users */ for (i = 0; i < NUM_DLCI; i++) if (gsm->dlci[i]) @@ -2171,7 +2159,6 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) static int gsm_activate_mux(struct gsm_mux *gsm) { struct gsm_dlci *dlci; - int i = 0; timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); init_waitqueue_head(&gsm->event); @@ -2183,18 +2170,6 @@ static int gsm_activate_mux(struct gsm_mux *gsm) else gsm->receive = gsm1_receive; - spin_lock(&gsm_mux_lock); - for (i = 0; i < MAX_MUX; i++) { - if (gsm_mux[i] == NULL) { - gsm->num = i; - gsm_mux[i] = gsm; - break; - } - } - spin_unlock(&gsm_mux_lock); - if (i == MAX_MUX) - return -EBUSY; - dlci = gsm_dlci_alloc(gsm, 0); if (dlci == NULL) return -ENOMEM; @@ -2210,6 +2185,15 @@ static int gsm_activate_mux(struct gsm_mux *gsm) */ static void gsm_free_mux(struct gsm_mux *gsm) { + int i; + + for (i = 0; i < MAX_MUX; i++) { + if (gsm == gsm_mux[i]) { + gsm_mux[i] = NULL; + break; + } + } + mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); @@ -2229,12 +2213,20 @@ static void gsm_free_muxr(struct kref *ref) static inline void mux_get(struct gsm_mux *gsm) { + unsigned long flags; + + spin_lock_irqsave(&gsm_mux_lock, flags); kref_get(&gsm->ref); + spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline void mux_put(struct gsm_mux *gsm) { + unsigned long flags; + + spin_lock_irqsave(&gsm_mux_lock, flags); kref_put(&gsm->ref, gsm_free_muxr); + spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline unsigned int mux_num_to_base(struct gsm_mux *gsm) @@ -2255,6 +2247,7 @@ static inline unsigned int mux_line_to_num(unsigned int line) static struct gsm_mux *gsm_alloc_mux(void) { + int i; struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL); if (gsm == NULL) return NULL; @@ -2284,6 +2277,26 @@ static struct gsm_mux *gsm_alloc_mux(void) gsm->mtu = 64; gsm->dead = true; /* Avoid early tty opens */ + /* Store the instance to the mux array or abort if no space is + * available. + */ + spin_lock(&gsm_mux_lock); + for (i = 0; i < MAX_MUX; i++) { + if (!gsm_mux[i]) { + gsm_mux[i] = gsm; + gsm->num = i; + break; + } + } + spin_unlock(&gsm_mux_lock); + if (i == MAX_MUX) { + mutex_destroy(&gsm->mutex); + kfree(gsm->txframe); + kfree(gsm->buf); + kfree(gsm); + return NULL; + } + return gsm; } From 284260f278b706364fb4c88a7b56ba5298d5973c Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:09 -0700 Subject: [PATCH 107/803] tty: n_gsm: fix mux cleanup after unregister tty device Internally, we manage the alive state of the mux channels and mux itself with the field member 'dead'. This makes it possible to notify the user if the accessed underlying link is already gone. On the other hand, however, removing the virtual ttys before terminating the channels may result in peer messages being received without any internal target. Move the mux cleanup procedure from gsmld_detach_gsm() to gsmld_close() to fix this by keeping the virtual ttys open until the mux has been cleaned up. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-4-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index f546dfe03d29..de97a3810731 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2479,7 +2479,6 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) for (i = 1; i < NUM_DLCI; i++) tty_unregister_device(gsm_tty_driver, base + i); } - gsm_cleanup_mux(gsm, false); tty_kref_put(gsm->tty); gsm->tty = NULL; } @@ -2544,6 +2543,12 @@ static void gsmld_close(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; + /* The ldisc locks and closes the port before calling our close. This + * means we have no way to do a proper disconnect. We will not bother + * to do one. + */ + gsm_cleanup_mux(gsm, false); + gsmld_detach_gsm(tty, gsm); gsmld_flush_buffer(tty); From 06d5afd4d640eea67f5623e76cd5fc03359b7f3c Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:10 -0700 Subject: [PATCH 108/803] tty: n_gsm: fix wrong signal octet encoding in convergence layer type 2 n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.5.2 describes that the signal octet in convergence layer type 2 can be either one or two bytes. The length is encoded in the EA bit. This is set 1 for the last byte in the sequence. gsmtty_modem_update() handles this correctly but gsm_dlci_data_output() fails to set EA to 1. There is no case in which we encode two signal octets as there is no case in which we send out a break signal. Therefore, always set the EA bit to 1 for the signal octet to fix this. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-5-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index de97a3810731..3ba2505908e3 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -832,7 +832,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) break; case 2: /* Unstructed with modem bits. Always one byte as we never send inline break data */ - *dp++ = gsm_encode_modem(dlci); + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; break; } WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len); From 7a0e4b1733b635026a87c023f6d703faf0095e39 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:11 -0700 Subject: [PATCH 109/803] tty: n_gsm: fix frame reception handling The frame checksum (FCS) is currently handled in gsm_queue() after reception of a frame. However, this breaks layering. A workaround with 'received_fcs' was implemented so far. Furthermore, frames are handled as such even if no end flag was received. Move FCS calculation from gsm_queue() to gsm0_receive() and gsm1_receive(). Also delay gsm_queue() call there until a full frame was received to fix both points. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-6-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 53 +++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 3ba2505908e3..4ce18b42c37a 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -219,7 +219,6 @@ struct gsm_mux { int encoding; u8 control; u8 fcs; - u8 received_fcs; u8 *txframe; /* TX framing buffer */ /* Method for the receiver side */ @@ -1794,18 +1793,7 @@ static void gsm_queue(struct gsm_mux *gsm) u8 cr; int address; int i, j, k, address_tmp; - /* We have to sneak a look at the packet body to do the FCS. - A somewhat layering violation in the spec */ - if ((gsm->control & ~PF) == UI) - gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len); - if (gsm->encoding == 0) { - /* WARNING: gsm->received_fcs is used for - gsm->encoding = 0 only. - In this case it contain the last piece of data - required to generate final CRC */ - gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs); - } if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; if (debug & 4) @@ -1993,19 +1981,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) break; case GSM_DATA: /* Data */ gsm->buf[gsm->count++] = c; - if (gsm->count == gsm->len) + if (gsm->count == gsm->len) { + /* Calculate final FCS for UI frames over all data */ + if ((gsm->control & ~PF) != UIH) { + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } gsm->state = GSM_FCS; + } break; case GSM_FCS: /* FCS follows the packet */ - gsm->received_fcs = c; - gsm_queue(gsm); + gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->state = GSM_SSOF; break; case GSM_SSOF: - if (c == GSM0_SOF) { - gsm->state = GSM_SEARCH; - break; - } + gsm->state = GSM_SEARCH; + if (c == GSM0_SOF) + gsm_queue(gsm); + else + gsm->bad_size++; break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); @@ -2024,11 +2018,24 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) { if (c == GSM1_SOF) { - /* EOF is only valid in frame if we have got to the data state - and received at least one byte (the FCS) */ - if (gsm->state == GSM_DATA && gsm->count) { - /* Extract the FCS */ + /* EOF is only valid in frame if we have got to the data state */ + if (gsm->state == GSM_DATA) { + if (gsm->count < 1) { + /* Missing FSC */ + gsm->malformed++; + gsm->state = GSM_START; + return; + } + /* Remove the FCS from data */ gsm->count--; + if ((gsm->control & ~PF) != UIH) { + /* Calculate final FCS for UI frames over all + * data but FCS + */ + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } + /* Add the FCS itself to test against GOOD_FCS */ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); From a24b4b2f660b7ddf3f484b37600bba382cb28a9d Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:12 -0700 Subject: [PATCH 110/803] tty: n_gsm: fix malformed counter for out of frame data The gsm_mux field 'malformed' represents the number of malformed frames received. However, gsm1_receive() also increases this counter for any out of frame byte. Fix this by ignoring out of frame data for the malformed counter. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-7-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 4ce18b42c37a..2e3da8a4697e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2044,7 +2044,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) } /* Any partial frame was a runt so go back to start */ if (gsm->state != GSM_START) { - gsm->malformed++; + if (gsm->state != GSM_SEARCH) + gsm->malformed++; gsm->state = GSM_START; } /* A SOF in GSM_START means we are still reading idling or From 535bf600de75a859698892ee873521a48d289ec1 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:13 -0700 Subject: [PATCH 111/803] tty: n_gsm: fix insufficient txframe size n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.7.2 states that the maximum frame size (N1) refers to the length of the information field (i.e. user payload). However, 'txframe' stores the whole frame including frame header, checksum and start/end flags. We also need to consider the byte stuffing overhead. Define constant for the protocol overhead and adjust the 'txframe' size calculation accordingly to reserve enough space for a complete mux frame including byte stuffing for advanced option mode. Note that no byte stuffing is applied to the start and end flag. Also use MAX_MTU instead of MAX_MRU as this buffer is used for data transmission. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-8-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 2e3da8a4697e..cc90b03ce005 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -73,6 +73,8 @@ module_param(debug, int, 0600); */ #define MAX_MRU 1500 #define MAX_MTU 1500 +/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ +#define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) /* @@ -2264,7 +2266,7 @@ static struct gsm_mux *gsm_alloc_mux(void) kfree(gsm); return NULL; } - gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL); + gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL); if (gsm->txframe == NULL) { kfree(gsm->buf); kfree(gsm); From deefc58bafb4841df7f0a0d85d89a1c819db9743 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:14 -0700 Subject: [PATCH 112/803] tty: n_gsm: fix wrong DLCI release order The current DLCI release order starts with the control channel followed by the user channels. Reverse this order to keep the control channel open until all user channels have been released. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-9-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index cc90b03ce005..6b953dfbb155 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2146,8 +2146,8 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) /* Finish outstanding timers, making sure they are done */ del_timer_sync(&gsm->t2_timer); - /* Free up any link layer users */ - for (i = 0; i < NUM_DLCI; i++) + /* Free up any link layer users and finally the control channel */ + for (i = NUM_DLCI - 1; i >= 0; i--) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); From 17eac652028501df7ea296b1d9b9c134db262b7d Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:15 -0700 Subject: [PATCH 113/803] tty: n_gsm: fix missing explicit ldisc flush In gsm_cleanup_mux() the muxer is closed down and all queues are removed. However, removing the queues is done without explicit control of the underlying buffers. Flush those before freeing up our queues to ensure that all outgoing queues are cleared consistently. Otherwise, a new mux connection establishment attempt may time out while the underlying tty is still busy sending out the remaining data from the previous connection. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-10-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 6b953dfbb155..1430d7f83bd2 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2152,6 +2152,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ + tty_ldisc_flush(gsm->tty); list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_list); From d0bcdffcad5a22f202e3bf37190c0dd8c080ea92 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:16 -0700 Subject: [PATCH 114/803] tty: n_gsm: fix wrong command retry handling n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.7.3 states that the valid range for the maximum number of retransmissions (N2) is from 0 to 255 (both including). gsm_config() fails to limit this range correctly. Furthermore, gsm_control_retransmit() handles this number incorrectly by performing N2 - 1 retransmission attempts. Setting N2 to zero results in more than 255 retransmission attempts. Fix the range check in gsm_config() and the value handling in gsm_control_send() and gsm_control_retransmit() to comply with 3GPP 27.010. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-11-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 1430d7f83bd2..628bda5f0622 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1354,7 +1354,6 @@ static void gsm_control_retransmit(struct timer_list *t) spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { - gsm->cretries--; if (gsm->cretries == 0) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; @@ -1363,6 +1362,7 @@ static void gsm_control_retransmit(struct timer_list *t) wake_up(&gsm->event); return; } + gsm->cretries--; gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } @@ -1403,7 +1403,7 @@ retry: /* If DLCI0 is in ADM mode skip retries, it won't respond */ if (gsm->dlci[0]->mode == DLCI_MODE_ADM) - gsm->cretries = 1; + gsm->cretries = 0; else gsm->cretries = gsm->n2; @@ -2343,7 +2343,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) /* Check the MRU/MTU range looks sane */ if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8) return -EINVAL; - if (c->n2 < 3) + if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; From 398867f59f956985f4c324f173eff7b946e14bd8 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:17 -0700 Subject: [PATCH 115/803] tty: n_gsm: fix wrong command frame length field encoding n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.6.1 states that each command frame shall be made up from type, length and value. Looking for example in chapter 5.4.6.3.5 at the description for the encoding of a flow control on command it becomes obvious, that the type and length field is always present whereas the value may be zero bytes long. The current implementation omits the length field if the value is not present. This is wrong. Correct this by always sending the length in gsm_control_transmit(). So far only the modem status command (MSC) has included a value and encoded its length directly. Therefore, also change gsmtty_modem_update(). Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-12-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 628bda5f0622..903278145078 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1327,11 +1327,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl) { - struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype); + struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype); if (msg == NULL) return; - msg->data[0] = (ctrl->cmd << 1) | 2 | EA; /* command */ - memcpy(msg->data + 1, ctrl->data, ctrl->len); + msg->data[0] = (ctrl->cmd << 1) | CR | EA; /* command */ + msg->data[1] = (ctrl->len << 1) | EA; + memcpy(msg->data + 2, ctrl->data, ctrl->len); gsm_data_queue(gsm->dlci[0], msg); } @@ -2957,19 +2958,17 @@ static struct tty_ldisc_ops tty_ldisc_packet = { static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) { - u8 modembits[5]; + u8 modembits[3]; struct gsm_control *ctrl; int len = 2; - if (brk) + modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ + modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; + if (brk) { + modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ len++; - - modembits[0] = len << 1 | EA; /* Data bytes */ - modembits[1] = dlci->addr << 2 | 3; /* DLCI, EA, 1 */ - modembits[2] = gsm_encode_modem(dlci) << 1 | EA; - if (brk) - modembits[3] = brk << 4 | 2 | EA; /* Valid, EA */ - ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1); + } + ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len); if (ctrl == NULL) return -ENOMEM; return gsm_control_wait(dlci->gsm, ctrl); From 317f86af7f5d19f286ed2d181cbaef4a188c7f19 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:18 -0700 Subject: [PATCH 116/803] tty: n_gsm: fix wrong signal octets encoding in MSC n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. The value of the modem status command (MSC) frame contains an address field, control signal and optional break signal octet. The address field is encoded as described in chapter 5.2.1.2 with only one octet (may be extended to more in future versions of the standard). Whereas the control signal and break signal octet are always one byte each. This is strange at first glance as it makes the EA bit redundant. However, the same two octets are also encoded as header in convergence layer type 2 as described in chapter 5.5.2. No header length field is given and the only way to test if there is an optional break signal octet is via the EA flag which extends the control signal octet with a break signal octet. Now it becomes obvious how the EA bit for those two octets shall be encoded in the MSC frame. The current implementation treats the signal octet different for MSC frame and convergence layer type 2 header even though the standard describes it for both in the same way. Use the EA bit to encode the signal octets not only in the convergence layer type 2 header but also in the MSC frame in the same way with either 1 or 2 bytes in case of an optional break signal. Adjust the receiving path accordingly in gsm_control_modem(). Fixes: 3ac06b905655 ("tty: n_gsm: Fix for modems with brk in modem status control") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-13-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 903278145078..23418ee93156 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1094,7 +1094,6 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) { unsigned int addr = 0; unsigned int modem = 0; - unsigned int brk = 0; struct gsm_dlci *dlci; int len = clen; int slen; @@ -1124,17 +1123,8 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) return; } len--; - if (len > 0) { - while (gsm_read_ea(&brk, *dp++) == 0) { - len--; - if (len == 0) - return; - } - modem <<= 7; - modem |= (brk & 0x7f); - } tty = tty_port_tty_get(&dlci->port); - gsm_process_modem(tty, dlci, modem, slen); + gsm_process_modem(tty, dlci, modem, slen - len); if (tty) { tty_wakeup(tty); tty_kref_put(tty); @@ -2963,8 +2953,10 @@ static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) int len = 2; modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ - modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; - if (brk) { + if (!brk) { + modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; + } else { + modembits[1] = gsm_encode_modem(dlci) << 1; modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ len++; } From 1adf6fee58ca25fb6720b8d34c919dcf5425cc9c Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:19 -0700 Subject: [PATCH 117/803] tty: n_gsm: fix missing tty wakeup in convergence layer type 2 gsm_control_modem() informs the virtual tty that more data can be written after receiving a control signal octet via modem status command (MSC). However, gsm_dlci_data() fails to do the same after receiving a control signal octet from the convergence layer type 2 header. Add tty_wakeup() in gsm_dlci_data() for convergence layer type 2 to fix this. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-14-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 23418ee93156..f3fb66be8513 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1615,6 +1615,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) tty = tty_port_tty_get(port); if (tty) { gsm_process_modem(tty, dlci, modem, slen); + tty_wakeup(tty); tty_kref_put(tty); } fallthrough; From 73029a4d7161f8b6c0934553145ef574d2d0c645 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:22 -0700 Subject: [PATCH 118/803] tty: n_gsm: fix reset fifo race condition gsmtty_write() and gsm_dlci_data_output() properly guard the fifo access. However, gsm_dlci_close() and gsmtty_flush_buffer() modifies the fifo but do not guard this. Add a guard here to prevent race conditions on parallel writes to the fifo. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-17-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index f3fb66be8513..15be4a235783 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1442,13 +1442,17 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) static void gsm_dlci_close(struct gsm_dlci *dlci) { + unsigned long flags; + del_timer(&dlci->t1); if (debug & 8) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); + spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); + spin_unlock_irqrestore(&dlci->lock, flags); /* Ensure that gsmtty_open() can return. */ tty_port_set_initialized(&dlci->port, 0); wake_up_interruptible(&dlci->port.open_wait); @@ -3148,13 +3152,17 @@ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) static void gsmtty_flush_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; + unsigned long flags; + if (dlci->state == DLCI_CLOSED) return; /* Caution needed: If we implement reliable transport classes then the data being transmitted can't simply be junked once it has first hit the stack. Until then we can just blow it away */ + spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); + spin_unlock_irqrestore(&dlci->lock, flags); /* Need to unhook this DLCI from the transmit queue logic */ } From ff9166c623704337bd6fe66fce2838d9768a6634 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Thu, 14 Apr 2022 02:42:25 -0700 Subject: [PATCH 119/803] tty: n_gsm: fix incorrect UA handling n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.4.2 states that any received unnumbered acknowledgment (UA) with its poll/final (PF) bit set to 0 shall be discarded. Currently, all UA frame are handled in the same way regardless of the PF bit. This does not comply with the standard. Remove the UA case in gsm_queue() to process only UA frames with PF bit set to 1 to abide the standard. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220414094225.4527-20-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 15be4a235783..e440c7f6d20e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1865,7 +1865,6 @@ static void gsm_queue(struct gsm_mux *gsm) } } break; - case UA: case UA|PF: if (cr == 0 || dlci == NULL) break; From 0e4deb56b0c625efdb70c94f150429e2f2a16fa1 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sat, 9 Apr 2022 01:35:02 +0200 Subject: [PATCH 120/803] serial: amba-pl011: do not time out prematurely when draining tx fifo The current timeout for draining the tx fifo in RS485 mode is calculated by multiplying the time it takes to transmit one character (with the given baud rate) with the maximal number of characters in the tx queue. This timeout is too short for two reasons: First when calculating the time to transmit one character integer division is used which may round down the result in case of a remainder of the division. Fix this by rounding up the division result. Second the hardware may need additional time (e.g for first putting the characters from the fifo into the shift register) before the characters are actually put onto the wire. To be on the safe side double the current maximum number of iterations that are used to wait for the queue draining. Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Cc: stable@vger.kernel.org Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20220408233503.7251-1-LinoSanfilippo@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 51ecb050ae40..4d11a3e547f9 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1255,13 +1255,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) static void pl011_rs485_tx_stop(struct uart_amba_port *uap) { + /* + * To be on the safe side only time out after twice as many iterations + * as fifo size. + */ + const int MAX_TX_DRAIN_ITERS = uap->port.fifosize * 2; struct uart_port *port = &uap->port; int i = 0; u32 cr; /* Wait until hardware tx queue is empty */ while (!pl011_tx_empty(port)) { - if (i == port->fifosize) { + if (i > MAX_TX_DRAIN_ITERS) { dev_warn(port->dev, "timeout while draining hardware tx queue\n"); break; @@ -2052,7 +2057,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, * with the given baud rate. We use this as the poll interval when we * wait for the tx queue to empty. */ - uap->rs485_tx_drain_interval = (bits * 1000 * 1000) / baud; + uap->rs485_tx_drain_interval = DIV_ROUND_UP(bits * 1000 * 1000, baud); pl011_setup_status_masks(port, termios); From 3ee82c6e41f3d2212647ce0bc5a05a0f69097824 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 11 Apr 2022 10:19:57 +0200 Subject: [PATCH 121/803] serial: imx: fix overrun interrupts in DMA mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 76821e222c18 ("serial: imx: ensure that RX irqs are off if RX is off") accidentally enabled overrun interrupts unconditionally when deferring DMA enable until after the receiver has been enabled during startup. Fix this by using the DMA-initialised instead of DMA-enabled flag to determine whether overrun interrupts should be enabled. Note that overrun interrupts are already accounted for in imx_uart_clear_rx_errors() when using DMA since commit 41d98b5da92f ("serial: imx-serial - update RX error counters when DMA is used"). Fixes: 76821e222c18 ("serial: imx: ensure that RX irqs are off if RX is off") Cc: stable@vger.kernel.org # 4.17 Cc: Uwe Kleine-König Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220411081957.7846-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index fd38e6ed4fda..a2100be8d554 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1448,7 +1448,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR); - if (!sport->dma_is_enabled) + if (!dma_is_inited) ucr4 |= UCR4_OREN; if (sport->inverted_rx) ucr4 |= UCR4_INVR; From cc994bb97587787b8f0c094a9bc6945d82075b1d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Apr 2022 14:16:57 +0300 Subject: [PATCH 122/803] serial: 8250: Fix runtime PM for start_tx() for empty buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 932d596378b0 ("serial: 8250: Return early in .start_tx() if there are no chars to send") caused a regression where the drivers implementing runtime PM stopped idling. This is because serial8250_rpm_put_tx() is now unbalanced on early return, it normally gets called at __stop_tx(). Fixes: 932d596378b0 ("serial: 8250: Return early in .start_tx() if there are no chars to send") Cc: Steffen Trumtrar Cc: Uwe Kleine-König Reviewed-by: Johan Hovold Reviewed-by: Uwe Kleine-König Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20220411111657.16744-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 318af6f13605..26f9330094bc 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1675,11 +1675,11 @@ static void serial8250_start_tx(struct uart_port *port) struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; - serial8250_rpm_get_tx(up); - if (!port->x_char && uart_circ_empty(&port->state->xmit)) return; + serial8250_rpm_get_tx(up); + if (em485 && em485->active_timer == &em485->start_tx_timer) return; From e7e51eb037d1848d4403efbf9696ea50c40cad36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 12 Apr 2022 14:49:16 +0200 Subject: [PATCH 123/803] iio: dac: ltc2688: fix voltage scale read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly set *val2 (and not overwrite *val) to correctly return IIO_VAL_FRACTIONAL_LOG2. Fixes: 832cb9eeb9312 ("iio: dac: add support for ltc2688") Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20220412124916.61-1-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ltc2688.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c index e41861d29767..2f9c384885f4 100644 --- a/drivers/iio/dac/ltc2688.c +++ b/drivers/iio/dac/ltc2688.c @@ -298,7 +298,7 @@ static int ltc2688_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - *val = 16; + *val2 = 16; return IIO_VAL_FRACTIONAL_LOG2; case IIO_CHAN_INFO_CALIBBIAS: ret = regmap_read(st->regmap, From b5d6ba09b10d2ccb865ed9bc45941db0a41c6756 Mon Sep 17 00:00:00 2001 From: Fawzi Khaber Date: Mon, 11 Apr 2022 13:15:33 +0200 Subject: [PATCH 124/803] iio: imu: inv_icm42600: Fix I2C init possible nack This register write to REG_INTF_CONFIG6 enables a spike filter that is impacting the line and can prevent the I2C ACK to be seen by the controller. So we don't test the return value. Fixes: 7297ef1e261672b8 ("iio: imu: inv_icm42600: add I2C driver") Signed-off-by: Fawzi Khaber Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20220411111533.5826-1-jmaneyrol@invensense.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c index 33d9afb1ba91..d4a692b838d0 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c @@ -18,12 +18,15 @@ static int inv_icm42600_i2c_bus_setup(struct inv_icm42600_state *st) unsigned int mask, val; int ret; - /* setup interface registers */ - ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6, - INV_ICM42600_INTF_CONFIG6_MASK, - INV_ICM42600_INTF_CONFIG6_I3C_EN); - if (ret) - return ret; + /* + * setup interface registers + * This register write to REG_INTF_CONFIG6 enables a spike filter that + * is impacting the line and can prevent the I2C ACK to be seen by the + * controller. So we don't test the return value. + */ + regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6, + INV_ICM42600_INTF_CONFIG6_MASK, + INV_ICM42600_INTF_CONFIG6_I3C_EN); ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG4, INV_ICM42600_INTF_CONFIG4_I3C_BUS_ONLY, 0); From a063f2fba3fa633a599253b62561051ac185fa99 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 Apr 2022 13:51:10 +0200 Subject: [PATCH 125/803] batman-adv: Don't skb_split skbuffs with frag_list The receiving interface might have used GRO to receive more fragments than MAX_SKB_FRAGS fragments. In this case, these will not be stored in skb_shinfo(skb)->frags but merged into the frag list. batman-adv relies on the function skb_split to split packets up into multiple smaller packets which are not larger than the MTU on the outgoing interface. But this function cannot handle frag_list entries and is only operating on skb_shinfo(skb)->frags. If it is still trying to split such an skb and xmit'ing it on an interface without support for NETIF_F_FRAGLIST, then validate_xmit_skb() will try to linearize it. But this fails due to inconsistent information. And __pskb_pull_tail will trigger a BUG_ON after skb_copy_bits() returns an error. In case of entries in frag_list, just linearize the skb before operating on it with skb_split(). Reported-by: Felix Kaechele Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Tested-by: Felix Kaechele Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0899a729a23f..c120c7c6d25f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto free_skb; } + /* GRO might have added fragments to the fragment list instead of + * frags[]. But this is not handled by skb_split and must be + * linearized to avoid incorrect length information after all + * batman-adv fragments were created and submitted to the + * hard-interface + */ + if (skb_has_frag_list(skb) && __skb_linearize(skb)) { + ret = -ENOMEM; + goto free_skb; + } + /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; frag_header.version = BATADV_COMPAT_VERSION; From ef0beba1a5fb0c693ddf7d31246bd96c925ffd00 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 18 Mar 2022 19:30:02 +0100 Subject: [PATCH 126/803] pinctrl: qcom: sm6350: fix order of UFS & SDC pins In other places the SDC and UFS pins have been swapped but this was missed in the PINCTRL_PIN definitions. Fix that. Fixes: 7d74b55afd27 ("pinctrl: qcom: Add SM6350 pinctrl driver") Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20220318183004.858707-5-luca.weiss@fairphone.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm6350.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c index 4d37b817b232..a91a86628f2f 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c @@ -264,14 +264,14 @@ static const struct pinctrl_pin_desc sm6350_pins[] = { PINCTRL_PIN(153, "GPIO_153"), PINCTRL_PIN(154, "GPIO_154"), PINCTRL_PIN(155, "GPIO_155"), - PINCTRL_PIN(156, "SDC1_RCLK"), - PINCTRL_PIN(157, "SDC1_CLK"), - PINCTRL_PIN(158, "SDC1_CMD"), - PINCTRL_PIN(159, "SDC1_DATA"), - PINCTRL_PIN(160, "SDC2_CLK"), - PINCTRL_PIN(161, "SDC2_CMD"), - PINCTRL_PIN(162, "SDC2_DATA"), - PINCTRL_PIN(163, "UFS_RESET"), + PINCTRL_PIN(156, "UFS_RESET"), + PINCTRL_PIN(157, "SDC1_RCLK"), + PINCTRL_PIN(158, "SDC1_CLK"), + PINCTRL_PIN(159, "SDC1_CMD"), + PINCTRL_PIN(160, "SDC1_DATA"), + PINCTRL_PIN(161, "SDC2_CLK"), + PINCTRL_PIN(162, "SDC2_CMD"), + PINCTRL_PIN(163, "SDC2_DATA"), }; #define DECLARE_MSM_GPIO_PINS(pin) \ From 4aaaaf0f279836f06d3b9d0ffeec7a1e1a04ceef Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Mon, 18 Apr 2022 17:07:06 +0800 Subject: [PATCH 127/803] hwmon: (f71882fg) Fix negative temperature All temperature of Fintek superio hwmonitor that using 1-byte reg will use 2's complement. In show_temp() temp = data->temp[nr] * 1000; When data->temp[nr] read as 255, it indicate -1C, but this code will report 255C to userspace. It'll be ok when change to: temp = ((s8)data->temp[nr]) * 1000; Signed-off-by: Ji-Ze Hong (Peter Hong) Link: https://lore.kernel.org/r/20220418090706.6339-1-hpeter+linux_kernel@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/f71882fg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 938a8b9ec70d..6830e029995d 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -1578,8 +1578,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, temp *= 125; if (sign) temp -= 128000; - } else - temp = data->temp[nr] * 1000; + } else { + temp = ((s8)data->temp[nr]) * 1000; + } return sprintf(buf, "%d\n", temp); } From eba1a872cb73314280d5448d934935b23e30b7ca Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Tue, 12 Apr 2022 19:05:45 +0800 Subject: [PATCH 128/803] ipvs: correctly print the memory size of ip_vs_conn_tab The memory size of ip_vs_conn_tab changed after we use hlist instead of list. Fixes: 731109e78415 ("ipvs: use hlist instead of list") Signed-off-by: Pengcheng Yang Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2c467c422dc6..fb67f1ca2495 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); + (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); From 3ccce9340326df40ba4462d4d2a1692b6387a68e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 11 Apr 2022 16:37:03 -0700 Subject: [PATCH 129/803] x86/cpu: Add new Alderlake and Raptorlake CPU model numbers Intel is subdividing the mobile segment with additional models with the same codename. Using the Intel "N" and "P" suffices for these will be less confusing than trying to map to some different naming convention. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/YlS7n7Xtso9BXZA2@agluck-desk3.sc.intel.com --- arch/x86/include/asm/intel-family.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 048b6d5aff50..def6ca121111 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -26,6 +26,7 @@ * _G - parts with extra graphics on * _X - regular server parts * _D - micro server parts + * _N,_P - other mobile parts * * Historical OPTDIFFs: * @@ -107,8 +108,10 @@ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_FAM6_ALDERLAKE_N 0xBE #define INTEL_FAM6_RAPTORLAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE_P 0xBA /* "Small Core" Processors (Atom) */ From b4f5c6b2e52b27462c0599e64e96e53b58438de1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 16 Apr 2022 13:54:08 +0100 Subject: [PATCH 130/803] ASoC: wm8958: Fix change notifications for DSP controls The WM8958 DSP controls all return 0 on successful write, not a boolean value indicating if the write changed the value of the control. Fix this by returning 1 after a change, there is already a check at the start of each put() that skips the function in the case that there is no change. Signed-off-by: Mark Brown Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20220416125408.197440-1-broonie@kernel.org Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8958-dsp2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index e4018ba3b19a..7878c7a58ff1 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -530,7 +530,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]); - return 0; + return 1; } #define WM8958_MBC_SWITCH(xname, xval) {\ @@ -656,7 +656,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]); - return 0; + return 1; } @@ -730,7 +730,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_HPF_SWITCH(xname, xval) {\ @@ -824,7 +824,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol, wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]); - return 0; + return 1; } #define WM8958_ENH_EQ_SWITCH(xname, xval) {\ From 50ff57888d0b13440e7f4cde05dc339ee8d0f1f8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 6 Apr 2022 17:07:54 +0100 Subject: [PATCH 131/803] btrfs: fix leaked plug after failure syncing log on zoned filesystems On a zoned filesystem, if we fail to allocate the root node for the log root tree while syncing the log, we end up returning without finishing the IO plug we started before, resulting in leaking resources as we have started writeback for extent buffers of a log tree before. That allocation failure, which typically is either -ENOMEM or -ENOSPC, is not fatal and the fsync can safely fallback to a full transaction commit. So release the IO plug if we fail to allocate the extent buffer for the root of the log root tree when syncing the log on a zoned filesystem. Fixes: 3ddebf27fcd3a9 ("btrfs: zoned: reorder log node allocation on zoned filesystem") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 571dae8ad65e..09e4f1a04e6f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { mutex_unlock(&fs_info->tree_root->log_mutex); + blk_finish_plug(&plug); goto out; } } From 50f1cff3d8865909727fad6f960ce5a050799d00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Mar 2022 17:52:10 +0100 Subject: [PATCH 132/803] btrfs: fix and document the zoned device choice in alloc_new_bio Zone Append bios only need a valid block device in struct bio, but not the device in the btrfs_bio. Use the information from btrfs_zoned_get_device to set up bi_bdev and fix zoned writes on multi-device file system with non-homogeneous capabilities and remove the pointless btrfs_bio.device assignment. Add big fat comments explaining what is going on here. Reviewed-by: Johannes Thumshirn Reviewed-by: Naohiro Aota Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 78486bbd1ac9..49f789627d00 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3334,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode, ret = calc_bio_boundaries(bio_ctrl, inode, file_offset); if (ret < 0) goto error; + if (wbc) { - struct block_device *bdev; + /* + * For Zone append we need the correct block_device that we are + * going to write to set in the bio to be able to respect the + * hardware limitation. Look it up here: + */ + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + struct btrfs_device *dev; - bdev = fs_info->fs_devices->latest_dev->bdev; - bio_set_dev(bio, bdev); - wbc_init_bio(wbc, bio); - } - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - struct btrfs_device *device; + dev = btrfs_zoned_get_device(fs_info, disk_bytenr, + fs_info->sectorsize); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto error; + } - device = btrfs_zoned_get_device(fs_info, disk_bytenr, - fs_info->sectorsize); - if (IS_ERR(device)) { - ret = PTR_ERR(device); - goto error; + bio_set_dev(bio, dev->bdev); + } else { + /* + * Otherwise pick the last added device to support + * cgroup writeback. For multi-device file systems this + * means blk-cgroup policies have to always be set on the + * last added/replaced device. This is a bit odd but has + * been like that for a long time. + */ + bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev); } - - btrfs_bio(bio)->device = device; + wbc_init_bio(wbc, bio); + } else { + ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND); } return 0; error: From 00d825258bcc09c0e1b99aa7f9ad7d2c2fad41fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Mar 2022 17:06:27 +0100 Subject: [PATCH 133/803] btrfs: fix direct I/O read repair for split bios When a bio is split in btrfs_submit_direct, dip->file_offset contains the file offset for the first bio. But this means the start value used in btrfs_check_read_dio_bio is incorrect for subsequent bios. Add a file_offset field to struct btrfs_bio to pass along the correct offset. Given that check_data_csum only uses start of an error message this means problems with this miscalculation will only show up when I/O fails or checksums mismatch. The logic was removed in f4f39fc5dc30 ("btrfs: remove btrfs_bio::logical member") but we need it due to the bio splitting. CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Reviewed-by: Naohiro Aota Reviewed-by: Qu Wenruo Reviewed-by: Sweet Tea Dorminy Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 1 + fs/btrfs/inode.c | 13 +++++-------- fs/btrfs/volumes.h | 3 +++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 49f789627d00..aa43f7811754 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode, repair_bio = btrfs_bio_alloc(1); repair_bbio = btrfs_bio(repair_bio); + repair_bbio->file_offset = start; repair_bio->bi_opf = REQ_OP_READ; repair_bio->bi_end_io = failed_bio->bi_end_io; repair_bio->bi_iter.bi_sector = failrec->logical >> 9; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 53a3f5e5ae89..ac9a3ebc2db3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7809,8 +7809,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); struct bio_vec bvec; struct bvec_iter iter; - const u64 orig_file_offset = dip->file_offset; - u64 start = orig_file_offset; u32 bio_offset = 0; blk_status_t err = BLK_STS_OK; @@ -7820,6 +7818,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); pgoff = bvec.bv_offset; for (i = 0; i < nr_sectors; i++) { + u64 start = bbio->file_offset + bio_offset; + ASSERT(pgoff < PAGE_SIZE); if (uptodate && (!csum || !check_data_csum(inode, bbio, @@ -7832,17 +7832,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, } else { int ret; - ASSERT((start - orig_file_offset) < UINT_MAX); - ret = btrfs_repair_one_sector(inode, - &bbio->bio, - start - orig_file_offset, - bvec.bv_page, pgoff, + ret = btrfs_repair_one_sector(inode, &bbio->bio, + bio_offset, bvec.bv_page, pgoff, start, bbio->mirror_num, submit_dio_repair_bio); if (ret) err = errno_to_blk_status(ret); } - start += sectorsize; ASSERT(bio_offset + sectorsize > bio_offset); bio_offset += sectorsize; pgoff += sectorsize; @@ -8045,6 +8041,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; + btrfs_bio(bio)->file_offset = file_offset; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bd297f23d19e..f3e28f11cfb6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -328,6 +328,9 @@ struct btrfs_fs_devices { struct btrfs_bio { unsigned int mirror_num; + /* for direct I/O */ + u64 file_offset; + /* @device is for stripe IO submission. */ struct btrfs_device *device; u8 *csum; From 0fdf977d4576ee0decd612e22f6a837a239573cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Mar 2022 17:06:28 +0100 Subject: [PATCH 134/803] btrfs: fix direct I/O writes for split bios on zoned devices When a bio is split in btrfs_submit_direct, dip->file_offset contains the file offset for the first bio. But this means the start value used in btrfs_end_dio_bio to record the write location for zone devices is incorrect for subsequent bios. CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Reviewed-by: Naohiro Aota Reviewed-by: Qu Wenruo Reviewed-by: Sweet Tea Dorminy Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac9a3ebc2db3..8bac68d8e96f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7865,6 +7865,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode, static void btrfs_end_dio_bio(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; + struct btrfs_bio *bbio = btrfs_bio(bio); blk_status_t err = bio->bi_status; if (err) @@ -7875,12 +7876,12 @@ static void btrfs_end_dio_bio(struct bio *bio) bio->bi_iter.bi_size, err); if (bio_op(bio) == REQ_OP_READ) - err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err); + err = btrfs_check_read_dio_bio(dip, bbio, !err); if (err) dip->dio_bio->bi_status = err; - btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio); + btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio); bio_put(bio); btrfs_dio_private_put(dip); From 71d471e3faf90c9674cadc7605ac719e82cb7fac Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Thu, 31 Mar 2022 13:40:23 -0500 Subject: [PATCH 135/803] usb: gadget: uvc: Fix crash when encoding data for usb request During the uvcg_video_pump() process, if an error occurs and uvcg_queue_cancel() is called, the buffer queue will be cleared out, but the current marker (queue->buf_used) of the active buffer (no longer active) is not reset. On the next iteration of uvcg_video_pump() the stale buf_used count will be used and the logic of min((unsigned int)len, buf->bytesused - queue->buf_used) may incorrectly calculate a nbytes size, causing an invalid memory access. [80802.185460][ T315] configfs-gadget gadget: uvc: VS request completed with status -18. [80802.185519][ T315] configfs-gadget gadget: uvc: VS request completed with status -18. ... uvcg_queue_cancel() is called and the queue is cleared out, but the marker queue->buf_used is not reset. ... [80802.262328][ T8682] Unable to handle kernel paging request at virtual address ffffffc03af9f000 ... ... [80802.263138][ T8682] Call trace: [80802.263146][ T8682] __memcpy+0x12c/0x180 [80802.263155][ T8682] uvcg_video_pump+0xcc/0x1e0 [80802.263165][ T8682] process_one_work+0x2cc/0x568 [80802.263173][ T8682] worker_thread+0x28c/0x518 [80802.263181][ T8682] kthread+0x160/0x170 [80802.263188][ T8682] ret_from_fork+0x10/0x18 [80802.263198][ T8682] Code: a8c12829 a88130cb a8c130 Fixes: d692522577c0 ("usb: gadget/uvc: Port UVC webcam gadget to use videobuf2 framework") Cc: Signed-off-by: Dan Vacura Link: https://lore.kernel.org/r/20220331184024.23918-1-w36195@motorola.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_queue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index d852ac9e47e7..2cda982f3765 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -264,6 +264,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) buf->state = UVC_BUF_STATE_ERROR; vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); } + queue->buf_used = 0; + /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not From 679ab61bf5f5f519377d812afb4fb93634782c74 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 18 Apr 2022 23:33:22 +0800 Subject: [PATCH 136/803] RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core() There is a deadlock in irdma_cleanup_cm_core(), which is shown below: (Thread 1) | (Thread 2) | irdma_schedule_cm_timer() irdma_cleanup_cm_core() | add_timer() spin_lock_irqsave() //(1) | (wait a time) ... | irdma_cm_timer_tick() del_timer_sync() | spin_lock_irqsave() //(2) (wait timer to stop) | ... We hold cm_core->ht_lock in position (1) of thread 1 and use del_timer_sync() to wait timer to stop, but timer handler also need cm_core->ht_lock in position (2) of thread 2. As a result, irdma_cleanup_cm_core() will block forever. This patch removes the check of timer_pending() in irdma_cleanup_cm_core(), because the del_timer_sync() function will just return directly if there isn't a pending timer. As a result, the lock is redundant, because there is no resource it could protect. Link: https://lore.kernel.org/r/20220418153322.42524-1-duoming@zju.edu.cn Signed-off-by: Duoming Zhou Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index dedb3b7edd8d..a98d962e5efb 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3246,15 +3246,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { - unsigned long flags; - if (!cm_core) return; - spin_lock_irqsave(&cm_core->ht_lock, flags); - if (timer_pending(&cm_core->tcp_timer)) - del_timer_sync(&cm_core->tcp_timer); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); + del_timer_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); From f9e14dbbd454581061c736bf70bf5cbb15ac927c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 19 Apr 2022 09:52:41 -0700 Subject: [PATCH 137/803] x86/cpu: Load microcode during restore_processor_state() When resuming from system sleep state, restore_processor_state() restores the boot CPU MSRs. These MSRs could be emulated by microcode. If microcode is not loaded yet, writing to emulated MSRs leads to unchecked MSR access error: ... PM: Calling lapic_suspend+0x0/0x210 unchecked MSR access error: WRMSR to 0x10f (tried to write 0x0...0) at rIP: ... (native_write_msr) Call Trace: ? restore_processor_state x86_acpi_suspend_lowlevel acpi_suspend_enter suspend_devices_and_enter pm_suspend.cold state_store kobj_attr_store sysfs_kf_write kernfs_fop_write_iter new_sync_write vfs_write ksys_write __x64_sys_write do_syscall_64 entry_SYSCALL_64_after_hwframe RIP: 0033:0x7fda13c260a7 To ensure microcode emulated MSRs are available for restoration, load the microcode on the boot CPU before restoring these MSRs. [ Pawan: write commit message and productize it. ] Fixes: e2a1256b17b1 ("x86/speculation: Restore speculation related MSRs during S3 resume") Reported-by: Kyle D. Pelton Signed-off-by: Borislav Petkov Signed-off-by: Pawan Gupta Tested-by: Kyle D. Pelton Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=215841 Link: https://lore.kernel.org/r/4350dfbf785cd482d3fafa72b2b49c83102df3ce.1650386317.git.pawan.kumar.gupta@linux.intel.com --- arch/x86/include/asm/microcode.h | 2 ++ arch/x86/kernel/cpu/microcode/core.c | 6 +++--- arch/x86/power/cpu.c | 10 +++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index d6bfdfb0f0af..0c3d3440fe27 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -131,10 +131,12 @@ extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool initrd_gone; +void microcode_bsp_resume(void); #else static inline void __init load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void reload_early_microcode(void) { } +static inline void microcode_bsp_resume(void) { } #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index f955d25076ba..239ff5fcec6a 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -758,9 +758,9 @@ static struct subsys_interface mc_cpu_interface = { }; /** - * mc_bp_resume - Update boot CPU microcode during resume. + * microcode_bsp_resume - Update boot CPU microcode during resume. */ -static void mc_bp_resume(void) +void microcode_bsp_resume(void) { int cpu = smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -772,7 +772,7 @@ static void mc_bp_resume(void) } static struct syscore_ops mc_syscore_ops = { - .resume = mc_bp_resume, + .resume = microcode_bsp_resume, }; static int mc_cpu_starting(unsigned int cpu) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 3822666fb73d..bb176c72891c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -262,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); perf_restore_debug_store(); - msr_restore_context(ctxt); c = &cpu_data(smp_processor_id()); if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL)) init_ia32_feat_ctl(c); + + microcode_bsp_resume(); + + /* + * This needs to happen after the microcode has been updated upon resume + * because some of the MSRs are "emulated" in microcode. + */ + msr_restore_context(ctxt); } /* Needed by apm.c */ From 6c8ef58a50b5fab6e364b558143490a2014e2a4f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 19 Apr 2022 10:34:16 +0300 Subject: [PATCH 138/803] x86/unwind/orc: Recheck address range after stack info was updated A crash was observed in the ORC unwinder: BUG: stack guard page was hit at 000000000dd984a2 (stack is 00000000d1caafca..00000000613712f0) kernel stack overflow (page fault): 0000 [#1] SMP NOPTI CPU: 93 PID: 23787 Comm: context_switch1 Not tainted 5.4.145 #1 RIP: 0010:unwind_next_frame Call Trace: perf_callchain_kernel get_perf_callchain perf_callchain perf_prepare_sample perf_event_output_forward __perf_event_overflow perf_ibs_handle_irq perf_ibs_nmi_handler nmi_handle default_do_nmi do_nmi end_repeat_nmi This was really two bugs: 1) The perf IBS code passed inconsistent regs to the unwinder. 2) The unwinder didn't handle the bad input gracefully. Fix the latter bug. The ORC unwinder needs to be immune against bad inputs. The problem is that stack_access_ok() doesn't recheck the validity of the full range of registers after switching to the next valid stack with get_stack_info(). Fix that. [ jpoimboe: rewrote commit log ] Signed-off-by: Dmitry Monakhov Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/1650353656-956624-1-git-send-email-dmtrmonakhov@yandex-team.ru Signed-off-by: Peter Zijlstra --- arch/x86/kernel/unwind_orc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 794fdef2501a..38185aedf7d1 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -339,11 +339,11 @@ static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, struct stack_info *info = &state->stack_info; void *addr = (void *)_addr; - if (!on_stack(info, addr, len) && - (get_stack_info(addr, state->task, info, &state->stack_mask))) - return false; + if (on_stack(info, addr, len)) + return true; - return true; + return !get_stack_info(addr, state->task, info, &state->stack_mask) && + on_stack(info, addr, len); } static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, From 610abf3dea1092445b4b185e14ed130d1ec6aa74 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 19 Apr 2022 09:54:41 -0700 Subject: [PATCH 139/803] MAINTAINERS: Add x86 unwinding entry Create a new section for x86 unwinder maintenance. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/db2b764b735a9481df9f7717a3a1f75ba496fcc1.1650387176.git.jpoimboe@redhat.com Signed-off-by: Peter Zijlstra --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 40fa1955ca3f..63ace80af8c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21434,6 +21434,15 @@ F: arch/x86/include/asm/uv/ F: arch/x86/kernel/apic/x2apic_uv_x.c F: arch/x86/platform/uv/ +X86 STACK UNWINDING +M: Josh Poimboeuf +M: Peter Zijlstra +S: Supported +F: arch/x86/include/asm/unwind*.h +F: arch/x86/kernel/dumpstack.c +F: arch/x86/kernel/stacktrace.c +F: arch/x86/kernel/unwind_*.c + X86 VDSO M: Andy Lutomirski L: linux-kernel@vger.kernel.org From 226d44acf6dfe71c9df5804b82364e93cf908b53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Apr 2022 11:45:53 +0200 Subject: [PATCH 140/803] lib/strn*,objtool: Enforce user_access_begin() rules Apparently GCC can fail to inline a 'static inline' single caller function: lib/strnlen_user.o: warning: objtool: strnlen_user()+0x33: call to do_strnlen_user() with UACCESS enabled lib/strncpy_from_user.o: warning: objtool: strncpy_from_user()+0x33: call to do_strncpy_from_user() with UACCESS enabled Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220408094718.262932488@infradead.org --- lib/strncpy_from_user.c | 2 +- lib/strnlen_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 08fc72d3ed16..6432b8c3e431 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -25,7 +25,7 @@ * hit it), 'max' is the address space maximum (and we return * -EFAULT if we hit it). */ -static inline long do_strncpy_from_user(char *dst, const char __user *src, +static __always_inline long do_strncpy_from_user(char *dst, const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index bffa0ebf9f8b..feeb935a2299 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -20,7 +20,7 @@ * if it fits in a aligned 'long'. The caller needs to check * the return value against "> max". */ -static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) +static __always_inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; unsigned long align, res = 0; From 2730d3c14a85617c177337f2e2af2108bf82c4ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Apr 2022 11:45:54 +0200 Subject: [PATCH 141/803] x86,xen,objtool: Add UNWIND hint SYM_CODE_START*() doesn't get auto-validated and needs an UNWIND hint to get checked, add one. vmlinux.o: warning: objtool: pvh_start_xen()+0x0: unreachable Reported-by: Thomas Gleixner Reported-by: Rick Edgecombe Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220408094718.321246297@infradead.org --- arch/x86/platform/pvh/head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 72c1e42d121d..7fe564eaf228 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -50,6 +50,7 @@ #define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) SYM_CODE_START_LOCAL(pvh_start_xen) + UNWIND_HINT_EMPTY cld lgdt (_pa(gdt)) From d4e5268a08b211b536fed29beb24271ecd85187e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Apr 2022 11:45:55 +0200 Subject: [PATCH 142/803] x86,objtool: Mark cpu_startup_entry() __noreturn GCC-8 isn't clever enough to figure out that cpu_start_entry() is a noreturn while objtool is. This results in code after the call in start_secondary(). Give GCC a hand so that they all agree on things. vmlinux.o: warning: objtool: start_secondary()+0x10e: unreachable Reported-by: Rick Edgecombe Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220408094718.383658532@infradead.org --- include/linux/cpu.h | 2 +- tools/objtool/check.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9cf51e41e697..54dc2f9a2d56 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -167,7 +167,7 @@ static inline int suspend_disable_secondary_cpus(void) { return 0; } static inline void suspend_enable_secondary_cpus(void) { } #endif /* !CONFIG_PM_SLEEP_SMP */ -void cpu_startup_entry(enum cpuhp_state state); +void __noreturn cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bd0c2c828940..e3a675d6a704 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -184,6 +184,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "do_group_exit", "stop_this_cpu", "__invalid_creds", + "cpu_startup_entry", }; if (!func) From d66e9d50ea5cd76b2c4875c758efad665283d7ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Apr 2022 11:45:56 +0200 Subject: [PATCH 143/803] x86,objtool: Explicitly mark idtentry_body()s tail REACHABLE Objtool can figure out that some \cfunc()s are noreturn and then complains about certain instances having unreachable tails: vmlinux.o: warning: objtool: asm_exc_xen_unknown_trap()+0x16: unreachable instruction Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220408094718.441854969@infradead.org --- arch/x86/entry/entry_64.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4faac48ebec5..73d958522b6a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -337,6 +337,9 @@ SYM_CODE_END(ret_from_fork) call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp error_return .endm From 4a5de9b76fcb3f477f73d5a63f6e27709e8af81f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:20 -0700 Subject: [PATCH 144/803] objtool: Enable unreachable warnings for CLANG LTO With IBT support in, objtool is now fully capable of following vmlinux code flow in LTO mode. Start reporting unreachable warnings for Clang LTO as well. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/7b12df54bceeb0761fe9fc8269ea0c00501214a9.1650300597.git.jpoimboe@redhat.com --- scripts/Makefile.build | 2 +- scripts/link-vmlinux.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 9717e6f6fb31..33c1ed581522 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -231,7 +231,7 @@ objtool_args = \ $(if $(part-of-module), --module) \ $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt) \ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ - $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ + $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ $(if $(CONFIG_RETPOLINE), --retpoline) \ $(if $(CONFIG_X86_SMAP), --uaccess) \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 20f44504a644..9361a1ef02c9 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -140,7 +140,7 @@ objtool_link() if ! is_enabled CONFIG_FRAME_POINTER; then objtoolopt="${objtoolopt} --no-fp" fi - if is_enabled CONFIG_GCOV_KERNEL || is_enabled CONFIG_LTO_CLANG; then + if is_enabled CONFIG_GCOV_KERNEL; then objtoolopt="${objtoolopt} --no-unreachable" fi if is_enabled CONFIG_RETPOLINE; then From 613871cd665ab26290c5ff531dd06c3789d31319 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:22 -0700 Subject: [PATCH 145/803] x86/static_call: Add ANNOTATE_NOENDBR to static call trampoline The static call trampoline is never indirect-branched to, but is referenced by the static call key. Add ANNOTATE_NOENDBR. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1b5b54aad7d81241dabe5e0c9b40dea64b540b00.1650300597.git.jpoimboe@redhat.com --- arch/x86/include/asm/static_call.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 2455d721503e..2d8dacd02643 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -26,6 +26,7 @@ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ + ANNOTATE_NOENDBR \ insns " \n" \ ".byte 0x53, 0x43, 0x54 \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ From 1c0513dec41e4d40eb21402dff397ad84ca13a44 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:23 -0700 Subject: [PATCH 146/803] x86/retpoline: Add ANNOTATE_NOENDBR for retpolines The retpolines are exported, so they're referenced by ksymtab sections. But they're never indirect-branched to, so add ANNOTATE_NOENDBR. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/b6ec963dfd9301b6b1d74ef7758fcb0b540d6c6c.1650300597.git.jpoimboe@redhat.com --- arch/x86/lib/retpoline.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 5f87bab4fb8d..b2b2366885a2 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -31,6 +31,7 @@ .align RETPOLINE_THUNK_SIZE SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ @@ -55,7 +56,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) - ANNOTATE_NOENDBR // apply_retpolines #define GEN(reg) THUNK reg #include From 7a00829f8ac3f76b3a3aa5c28ce4ddfd2f977bbe Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:24 -0700 Subject: [PATCH 147/803] x86/uaccess: Add ENDBR to __put_user_nocheck*() The __put_user_nocheck*() inner labels are exported, so in keeping with the "allow exported functions to be indirectly called" policy, add ENDBR. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/207f02177a23031091d1a608de6049a9e5e8ff80.1650300597.git.jpoimboe@redhat.com --- arch/x86/lib/putuser.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index ecb2049c1273..b7dfd60243b7 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -48,6 +48,7 @@ SYM_FUNC_START(__put_user_1) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) + ENDBR ASM_STAC 1: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -62,6 +63,7 @@ SYM_FUNC_START(__put_user_2) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) + ENDBR ASM_STAC 2: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -76,6 +78,7 @@ SYM_FUNC_START(__put_user_4) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) + ENDBR ASM_STAC 3: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -90,6 +93,7 @@ SYM_FUNC_START(__put_user_8) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL) + ENDBR ASM_STAC 4: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 From 1ab80a0da4c4a4dd496fc14faabbc8bde61a605c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:25 -0700 Subject: [PATCH 148/803] x86/xen: Add ANNOTATE_NOENDBR to startup_xen() The startup_xen() kernel entry point is referenced by the ".note.Xen" section, and is the real entry point of the VM. Control transfer is through IRET, which *could* set NEED_ENDBR, however Xen currently does no such thing. Add ANNOTATE_NOENDBR to silence future objtool warnings. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrew Cooper Link: https://lkml.kernel.org/r/a87bd48b06d11ec4b98122a429e71e489b4e48c3.1650300597.git.jpoimboe@redhat.com --- arch/x86/xen/xen-head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index ac17196e2518..3a2cd93bf059 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -45,6 +45,7 @@ SYM_CODE_END(hypercall_page) __INIT SYM_CODE_START(startup_xen) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR cld /* Clear .bss */ From 4baae989e638e9bf4b7d29bc5e36b581fddcca52 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:29 -0700 Subject: [PATCH 149/803] objtool: Print data address for "!ENDBR" data warnings When a "!ENDBR" warning is reported for a data section, objtool just prints the text address of the relocation target twice, without giving any clues about the location of the original data reference: vmlinux.o: warning: objtool: dcbnl_netdevice_event()+0x0: .text+0xb64680: data relocation to !ENDBR: dcbnl_netdevice_event+0x0 Instead, print the address of the data reference, in addition to the address of the relocation target. vmlinux.o: warning: objtool: dcbnl_nb+0x0: .data..read_mostly+0xe260: data relocation to !ENDBR: dcbnl_netdevice_event+0x0 Fixes: 89bc853eae4a ("objtool: Find unused ENDBR instructions") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/762e88d51300e8eaf0f933a5b0feae20ac033bea.1650300597.git.jpoimboe@redhat.com --- tools/objtool/check.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e3a675d6a704..b822a6d5a172 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3817,11 +3817,8 @@ static int validate_ibt(struct objtool_file *file) struct instruction *dest; dest = validate_ibt_reloc(file, reloc); - if (is_data && dest && !dest->noendbr) { - warn_noendbr("data ", reloc->sym->sec, - reloc->sym->offset + reloc->addend, - dest); - } + if (is_data && dest && !dest->noendbr) + warn_noendbr("data ", sec, reloc->offset, dest); } } From 1d08b92fa2c41c43e4efe9787413e9ac9a434f83 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Apr 2022 09:50:30 -0700 Subject: [PATCH 150/803] objtool: Use offstr() to print address of missing ENDBR Fixes: 89bc853eae4a ("objtool: Find unused ENDBR instructions") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/95d12e800c736a3f7d08d61dabb760b2d5251a8e.1650300597.git.jpoimboe@redhat.com --- tools/objtool/check.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b822a6d5a172..5285edd41da8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3211,9 +3211,8 @@ validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc) static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset, struct instruction *dest) { - WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg, - dest->func ? dest->func->name : dest->sec->name, - dest->func ? dest->offset - dest->func->offset : dest->offset); + WARN_FUNC("%srelocation to !ENDBR: %s", sec, offset, msg, + offstr(dest->sec, dest->offset)); } static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn, From 4cdfc11b2836e659c0b7b31152a8b0d976167b59 Mon Sep 17 00:00:00 2001 From: Nur Hussein Date: Mon, 18 Apr 2022 03:24:54 +0800 Subject: [PATCH 151/803] x86/Kconfig: fix the spelling of 'becoming' in X86_KERNEL_IBT config There is only one m in becoming. Signed-off-by: Nur Hussein Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220417192454.10247-1-hussein@unixcat.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b0142e01002e..4bed3abf444d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1866,7 +1866,7 @@ config X86_KERNEL_IBT code with them to make this happen. In addition to building the kernel with IBT, seal all functions that - are not indirect call targets, avoiding them ever becomming one. + are not indirect call targets, avoiding them ever becoming one. This requires LTO like objtool runs and will slow down the build. It does significantly reduce the number of ENDBR instructions in the From 02041b32256628aef0d18ec15d3658fe41bc1afe Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Apr 2022 16:10:29 -0700 Subject: [PATCH 152/803] x86/uaccess: Don't jump between functions For unwinding sanity, a function shouldn't jump to the middle of another function. Move the short string user copy code out to a separate non-function code snippet. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/9519e4853148b765e047967708f2b61e56c93186.1649718562.git.jpoimboe@redhat.com --- arch/x86/lib/copy_user_64.S | 87 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 8ca5ecf16dc4..9dec1b38a98f 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -53,12 +53,12 @@ SYM_FUNC_START(copy_user_generic_unrolled) ASM_STAC cmpl $8,%edx - jb 20f /* less then 8 bytes, go to byte copy loop */ + jb .Lcopy_user_short_string_bytes ALIGN_DESTINATION movl %edx,%ecx andl $63,%edx shrl $6,%ecx - jz .L_copy_short_string + jz copy_user_short_string 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 3: movq 2*8(%rsi),%r10 @@ -79,37 +79,11 @@ SYM_FUNC_START(copy_user_generic_unrolled) leaq 64(%rdi),%rdi decl %ecx jnz 1b -.L_copy_short_string: - movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz 20f -18: movq (%rsi),%r8 -19: movq %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz 18b -20: andl %edx,%edx - jz 23f - movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 21b -23: xor %eax,%eax - ASM_CLAC - RET + jmp copy_user_short_string 30: shll $6,%ecx addl %ecx,%edx - jmp 60f -40: leal (%rdx,%rcx,8),%edx - jmp 60f -50: movl %ecx,%edx -60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ + jmp .Lcopy_user_handle_tail _ASM_EXTABLE_CPY(1b, 30b) _ASM_EXTABLE_CPY(2b, 30b) @@ -127,10 +101,6 @@ SYM_FUNC_START(copy_user_generic_unrolled) _ASM_EXTABLE_CPY(14b, 30b) _ASM_EXTABLE_CPY(15b, 30b) _ASM_EXTABLE_CPY(16b, 30b) - _ASM_EXTABLE_CPY(18b, 40b) - _ASM_EXTABLE_CPY(19b, 40b) - _ASM_EXTABLE_CPY(21b, 50b) - _ASM_EXTABLE_CPY(22b, 50b) SYM_FUNC_END(copy_user_generic_unrolled) EXPORT_SYMBOL(copy_user_generic_unrolled) @@ -191,7 +161,7 @@ EXPORT_SYMBOL(copy_user_generic_string) SYM_FUNC_START(copy_user_enhanced_fast_string) ASM_STAC /* CPUs without FSRM should avoid rep movsb for short copies */ - ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM + ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM movl %edx,%ecx 1: rep movsb xorl %eax,%eax @@ -243,6 +213,53 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) SYM_CODE_END(.Lcopy_user_handle_tail) +/* + * Finish memcpy of less than 64 bytes. #AC should already be set. + * + * Input: + * rdi destination + * rsi source + * rdx count (< 64) + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +SYM_CODE_START_LOCAL(copy_user_short_string) + movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz .Lcopy_user_short_string_bytes +18: movq (%rsi),%r8 +19: movq %r8,(%rdi) + leaq 8(%rsi),%rsi + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +.Lcopy_user_short_string_bytes: + andl %edx,%edx + jz 23f + movl %edx,%ecx +21: movb (%rsi),%al +22: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 21b +23: xor %eax,%eax + ASM_CLAC + RET + +40: leal (%rdx,%rcx,8),%edx + jmp 60f +50: movl %ecx,%edx /* ecx is zerorest also */ +60: jmp .Lcopy_user_handle_tail + + _ASM_EXTABLE_CPY(18b, 40b) + _ASM_EXTABLE_CPY(19b, 40b) + _ASM_EXTABLE_CPY(21b, 50b) + _ASM_EXTABLE_CPY(22b, 50b) +SYM_CODE_END(copy_user_short_string) + /* * copy_user_nocache - Uncached memory copy with exception handling * This will force destination out of cache for more performance. From 26ff604102c98df79c3fe2614d1b9bb068d4c28c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Apr 2022 16:10:30 -0700 Subject: [PATCH 153/803] objtool: Don't set 'jump_dest' for sibling calls For most sibling calls, 'jump_dest' is NULL because objtool treats the jump like a call and sets 'call_dest'. But there are a few edge cases where that's not true. Make it consistent to avoid unexpected behavior. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/8737d6b9d1691831aed73375f444f0f42da3e2c9.1649718562.git.jpoimboe@redhat.com --- tools/objtool/check.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bd0c2c828940..6f492789c8c0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1271,7 +1271,7 @@ static bool is_first_func_insn(struct objtool_file *file, struct instruction *in */ static int add_jump_destinations(struct objtool_file *file) { - struct instruction *insn; + struct instruction *insn, *jump_dest; struct reloc *reloc; struct section *dest_sec; unsigned long dest_off; @@ -1291,7 +1291,10 @@ static int add_jump_destinations(struct objtool_file *file) add_retpoline_call(file, insn); continue; } else if (insn->func) { - /* internal or external sibling call (with reloc) */ + /* + * External sibling call or internal sibling call with + * STT_FUNC reloc. + */ add_call_dest(file, insn, reloc->sym, true); continue; } else if (reloc->sym->sec->idx) { @@ -1303,8 +1306,8 @@ static int add_jump_destinations(struct objtool_file *file) continue; } - insn->jump_dest = find_insn(file, dest_sec, dest_off); - if (!insn->jump_dest) { + jump_dest = find_insn(file, dest_sec, dest_off); + if (!jump_dest) { /* * This is a special case where an alt instruction @@ -1323,8 +1326,8 @@ static int add_jump_destinations(struct objtool_file *file) /* * Cross-function jump. */ - if (insn->func && insn->jump_dest->func && - insn->func != insn->jump_dest->func) { + if (insn->func && jump_dest->func && + insn->func != jump_dest->func) { /* * For GCC 8+, create parent/child links for any cold @@ -1342,16 +1345,22 @@ static int add_jump_destinations(struct objtool_file *file) * subfunction is through a jump table. */ if (!strstr(insn->func->name, ".cold") && - strstr(insn->jump_dest->func->name, ".cold")) { - insn->func->cfunc = insn->jump_dest->func; - insn->jump_dest->func->pfunc = insn->func; + strstr(jump_dest->func->name, ".cold")) { + insn->func->cfunc = jump_dest->func; + jump_dest->func->pfunc = insn->func; - } else if (!same_function(insn, insn->jump_dest) && - is_first_func_insn(file, insn->jump_dest)) { - /* internal sibling call (without reloc) */ - add_call_dest(file, insn, insn->jump_dest->func, true); + } else if (!same_function(insn, jump_dest) && + is_first_func_insn(file, jump_dest)) { + /* + * Internal sibling call without reloc or with + * STT_SECTION reloc. + */ + add_call_dest(file, insn, jump_dest->func, true); + continue; } } + + insn->jump_dest = jump_dest; } return 0; From 34c861e806478ac2ea4032721defbf1d6967df08 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Apr 2022 16:10:31 -0700 Subject: [PATCH 154/803] objtool: Fix sibling call detection in alternatives In add_jump_destinations(), sibling call detection requires 'insn->func' to be valid. But alternative instructions get their 'func' set in handle_group_alt(), which runs *after* add_jump_destinations(). So sibling calls in alternatives code don't get properly detected. Fix that by changing the initialization order: call add_special_section_alts() *before* add_jump_destinations(). This also means the special case for a missing 'jump_dest' in add_jump_destinations() can be removed, as it has already been dealt with. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/c02e0a0a2a4286b5f848d17c77fdcb7e0caf709c.1649718562.git.jpoimboe@redhat.com --- tools/objtool/check.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6f492789c8c0..0f5d3de30e0d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1277,6 +1277,13 @@ static int add_jump_destinations(struct objtool_file *file) unsigned long dest_off; for_each_insn(file, insn) { + if (insn->jump_dest) { + /* + * handle_group_alt() may have previously set + * 'jump_dest' for some alternatives. + */ + continue; + } if (!is_static_jump(insn)) continue; @@ -1308,15 +1315,6 @@ static int add_jump_destinations(struct objtool_file *file) jump_dest = find_insn(file, dest_sec, dest_off); if (!jump_dest) { - - /* - * This is a special case where an alt instruction - * jumps past the end of the section. These are - * handled later in handle_group_alt(). - */ - if (!strcmp(insn->sec->name, ".altinstr_replacement")) - continue; - WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); @@ -1549,13 +1547,13 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = arch_jump_destination(insn); - if (dest_off == special_alt->new_off + special_alt->new_len) + if (dest_off == special_alt->new_off + special_alt->new_len) { insn->jump_dest = next_insn_same_sec(file, last_orig_insn); - - if (!insn->jump_dest) { - WARN_FUNC("can't find alternative jump destination", - insn->sec, insn->offset); - return -1; + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", + insn->sec, insn->offset); + return -1; + } } } @@ -2254,14 +2252,14 @@ static int decode_sections(struct objtool_file *file) return ret; /* - * Must be before add_special_section_alts() as that depends on - * jump_dest being set. + * Must be before add_jump_destinations(), which depends on 'func' + * being set for alternatives, to enable proper sibling call detection. */ - ret = add_jump_destinations(file); + ret = add_special_section_alts(file); if (ret) return ret; - ret = add_special_section_alts(file); + ret = add_jump_destinations(file); if (ret) return ret; From 08feafe8d1958febf3a9733a3d1564d8fc23340e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Apr 2022 16:10:32 -0700 Subject: [PATCH 155/803] objtool: Fix function fallthrough detection for vmlinux Objtool's function fallthrough detection only works on C objects. The distinction between C and assembly objects no longer makes sense with objtool running on vmlinux.o. Now that copy_user_64.S has been fixed up, and an objtool sibling call detection bug has been fixed, the asm code is in "compliance" and this hack is no longer needed. Remove it. Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/b434cff98eca3a60dcc64c620d7d5d405a0f441c.1649718562.git.jpoimboe@redhat.com --- tools/objtool/check.c | 2 +- tools/objtool/include/objtool/objtool.h | 2 +- tools/objtool/objtool.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0f5d3de30e0d..5f10653eb5c2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3310,7 +3310,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, while (1) { next_insn = next_insn_to_validate(file, insn); - if (file->c_file && func && insn->func && func != insn->func->pfunc) { + if (func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 7a5c13a78f87..a6e72d916807 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -27,7 +27,7 @@ struct objtool_file { struct list_head static_call_list; struct list_head mcount_loc_list; struct list_head endbr_list; - bool ignore_unreachables, c_file, hints, rodata; + bool ignore_unreachables, hints, rodata; unsigned int nr_endbr; unsigned int nr_endbr_int; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index b09946f4e1d6..843ff3c2f28e 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -129,7 +129,6 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); - file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; From b4a64ed6e7b857317070fcb9d87ff5d4a73be3e8 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Thu, 14 Apr 2022 15:44:34 +0800 Subject: [PATCH 156/803] USB: serial: option: add support for Cinterion MV32-WA/MV32-WB Add support for Cinterion device MV32-WA/MV32-WB. MV32-WA PID is 0x00F1, and MV32-WB PID is 0x00F2. Test evidence as below: T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 4 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=1e2d ProdID=00f1 Rev=05.04 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00F1 USB Mobile Broadband S: SerialNumber=78ada8c4 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=1e2d ProdID=00f2 Rev=05.04 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00F2 USB Mobile Broadband S: SerialNumber=cdd06a78 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Interface 0&1: MBIM, 2:Modem, 3: GNSS, 4: NMEA, 5: Diag GNSS port don't use serial driver. Signed-off-by: Slark Xiao Link: https://lore.kernel.org/r/20220414074434.5699-1-slark_xiao@163.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8e2fc232da10..1364ce7f0abf 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_CLS8 0x00b0 #define CINTERION_PRODUCT_MV31_MBIM 0x00b3 #define CINTERION_PRODUCT_MV31_RMNET 0x00b7 +#define CINTERION_PRODUCT_MV32_WA 0x00f1 +#define CINTERION_PRODUCT_MV32_WB 0x00f2 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1975,6 +1977,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), .driver_info = RSVD(0)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff), + .driver_info = RSVD(3)}, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), From 35a923a0b329c343e9e81d79518e2937eba06fcd Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Thu, 14 Apr 2022 10:12:02 +0200 Subject: [PATCH 157/803] USB: serial: cp210x: add PIDs for Kamstrup USB Meter Reader Wireless reading of water and heat meters using 868 MHz wM-Bus mode C1. The two different product IDs allow detection of dongle antenna solution: - Internal antenna - External antenna using SMA connector https://www.kamstrup.com/en-en/water-solutions/water-meter-reading/usb-meter-reader Signed-off-by: Bruno Thomsen Link: https://lore.kernel.org/r/20220414081202.5591-1-bruno.thomsen@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index a27f7efcec6a..c374620a486f 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -194,6 +194,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ + { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ + { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ From 2c8045d48dee703ad8eab2be7d6547765a89c069 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 15 Apr 2022 16:03:10 +0200 Subject: [PATCH 158/803] phy: amlogic: fix error path in phy_g12a_usb3_pcie_probe() If clk_prepare_enable() fails we call clk_disable_unprepare() in the error path what results in a warning that the clock is disabled and unprepared already. And if we fail later in phy_g12a_usb3_pcie_probe() then we bail out w/o calling clk_disable_unprepare(). This patch fixes both errors. Fixes: 36077e16c050 ("phy: amlogic: Add Amlogic G12A USB3 + PCIE Combo PHY Driver") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/8e416f95-1084-ee28-860e-7884f7fa2e32@gmail.com Signed-off-by: Vinod Koul --- .../phy/amlogic/phy-meson-g12a-usb3-pcie.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c index 5b471ab80fe2..54d65a6f0fcc 100644 --- a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c +++ b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c @@ -414,19 +414,19 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->clk_ref); if (ret) - goto err_disable_clk_ref; + return ret; priv->reset = devm_reset_control_array_get_exclusive(dev); - if (IS_ERR(priv->reset)) - return PTR_ERR(priv->reset); + if (IS_ERR(priv->reset)) { + ret = PTR_ERR(priv->reset); + goto err_disable_clk_ref; + } priv->phy = devm_phy_create(dev, np, &phy_g12a_usb3_pcie_ops); if (IS_ERR(priv->phy)) { ret = PTR_ERR(priv->phy); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to create PHY\n"); - - return ret; + dev_err_probe(dev, ret, "failed to create PHY\n"); + goto err_disable_clk_ref; } phy_set_drvdata(priv->phy, priv); @@ -434,8 +434,12 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, phy_g12a_usb3_pcie_xlate); + if (IS_ERR(phy_provider)) { + ret = PTR_ERR(phy_provider); + goto err_disable_clk_ref; + } - return PTR_ERR_OR_ZERO(phy_provider); + return 0; err_disable_clk_ref: clk_disable_unprepare(priv->clk_ref); From 95d4782c34a60800ccf91d9f0703137d4367a2fc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 19 Apr 2022 14:01:58 -0700 Subject: [PATCH 159/803] iommu/arm-smmu-v3: Fix size calculation in arm_smmu_mm_invalidate_range() The arm_smmu_mm_invalidate_range function is designed to be called by mm core for Shared Virtual Addressing purpose between IOMMU and CPU MMU. However, the ways of two subsystems defining their "end" addresses are slightly different. IOMMU defines its "end" address using the last address of an address range, while mm core defines that using the following address of an address range: include/linux/mm_types.h: unsigned long vm_end; /* The first byte after our end address ... This mismatch resulted in an incorrect calculation for size so it failed to be page-size aligned. Further, it caused a dead loop at "while (iova < end)" check in __arm_smmu_tlb_inv_range function. This patch fixes the issue by doing the calculation correctly. Fixes: 2f7e8c553e98 ("iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220419210158.21320-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 22ddd05bbdcd..c623dae1e115 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -183,7 +183,14 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - size_t size = end - start + 1; + size_t size; + + /* + * The mm_types defines vm_end as the first byte after the end address, + * different from IOMMU subsystem using the last address of an address + * range. So do a simple translation here by calculating size correctly. + */ + size = end - start; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, From 3756aa16fadaef2873cfbd2659dfa1978a7e1859 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 12 Apr 2022 13:16:58 +0200 Subject: [PATCH 160/803] ASoC: simple-card-utils: fix sysclk shutdown In asoc_simple_shutdown() the snd_soc_dai_set_sysclk() function is called twice with input direction SND_SOC_CLOCK_IN. Restore one call with output direction SND_SOC_CLOCK_OUT. Fixes: 5ca2ab459817 ("ASoC: simple-card-utils: Add new system-clock-fixed flag") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20220412111658.11015-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index f2157944247f..da0c27828ce6 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -322,7 +322,7 @@ void asoc_simple_shutdown(struct snd_pcm_substream *substream) if (props->mclk_fs && !dai->clk_fixed && !snd_soc_dai_active(cpu_dai)) snd_soc_dai_set_sysclk(cpu_dai, - 0, 0, SND_SOC_CLOCK_IN); + 0, 0, SND_SOC_CLOCK_OUT); asoc_simple_clk_disable(dai); } From 570a4bf7440e9fb2a4164244a6bf60a46362b627 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 18 Apr 2022 12:41:04 -0500 Subject: [PATCH 161/803] RDMA/rxe: Recheck the MR in when generating a READ reply The rping benchmark fails on long runs. The root cause of this failure has been traced to a failure to compute a nonzero value of mr in rare situations. Fix this failure by correctly handling the computation of mr in read_reply() in rxe_resp.c in the replay flow. Fixes: 8a1a0be894da ("RDMA/rxe: Replace mr by rkey in responder resources") Link: https://lore.kernel.org/r/20220418174103.3040-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1d95fab606da..9cd0eaff98de 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -745,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, } if (res->state == rdatm_res_state_new) { - mr = qp->resp.mr; - qp->resp.mr = NULL; + if (!res->replay) { + mr = qp->resp.mr; + qp->resp.mr = NULL; + } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + } if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; From 48473802506d2d6151f59e0e764932b33b53cb3b Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 20 Apr 2022 03:13:44 -0700 Subject: [PATCH 162/803] tty: n_gsm: fix missing update of modem controls after DLCI open Currently the peer is not informed about the initial state of the modem control lines after a new DLCI has been opened. Fix this by sending the initial modem control line states after DLCI open. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220420101346.3315-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index e440c7f6d20e..979dc9151383 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -370,6 +370,7 @@ static const u8 gsm_fcs8[256] = { #define GOOD_FCS 0xCF static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); +static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk); /** * gsm_fcs_add - update FCS @@ -1483,6 +1484,9 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Register gsmtty driver,report gsmtty dev add uevent for user */ tty_register_device(gsm_tty_driver, dlci->addr, NULL); + /* Send current modem state */ + if (dlci->addr) + gsmtty_modem_update(dlci, 0); wake_up(&dlci->gsm->event); } From aa63a74d4535a1d97b60e46655a1361c42565b89 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 6 Apr 2022 15:01:50 -0700 Subject: [PATCH 163/803] topology/sysfs: Hide PPIN on systems that do not support it. Systems that do not support a Protected Processor Identification Number currently report: # cat /sys/devices/system/cpu/cpu0/topology/ppin 0x0 which is confusing/wrong. Add a ".is_visible" function to suppress inclusion of the ppin file. Fixes: ab28e944197f ("topology/sysfs: Add PPIN in sysfs under cpu topology") Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20220406220150.63855-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/topology.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/base/topology.c b/drivers/base/topology.c index e9d1efcda89b..706dbf8bf249 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -152,9 +152,21 @@ static struct attribute *default_attrs[] = { NULL }; +static umode_t topology_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + + if (attr == &dev_attr_ppin.attr && !topology_ppin(dev->id)) + return 0; + + return attr->mode; +} + static const struct attribute_group topology_attr_group = { .attrs = default_attrs, .bin_attrs = bin_attrs, + .is_visible = topology_is_visible, .name = "topology" }; From db1e59483dfd8d4e956575302520bb8f7e20c79b Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 11 Apr 2022 13:53:34 -0700 Subject: [PATCH 164/803] topology: make core_mask include at least cluster_siblings Ampere Altra defines CPU clusters in the ACPI PPTT. They share a Snoop Control Unit, but have no shared CPU-side last level cache. cpu_coregroup_mask() will return a cpumask with weight 1, while cpu_clustergroup_mask() will return a cpumask with weight 2. As a result, build_sched_domain() will BUG() once per CPU with: BUG: arch topology borken the CLS domain not a subset of the MC domain The MC level cpumask is then extended to that of the CLS child, and is later removed entirely as redundant. This sched domain topology is an improvement over previous topologies, or those built without SCHED_CLUSTER, particularly for certain latency sensitive workloads. With the current scheduler model and heuristics, this is a desirable default topology for Ampere Altra and Altra Max system. Rather than create a custom sched domains topology structure and introduce new logic in arch/arm64 to detect these systems, update the core_mask so coregroup is never a subset of clustergroup, extending it to cluster_siblings if necessary. Only do this if CONFIG_SCHED_CLUSTER is enabled to avoid also changing the topology (MC) when CONFIG_SCHED_CLUSTER is disabled. This has the added benefit over a custom topology of working for both symmetric and asymmetric topologies. It does not address systems where the CLUSTER topology is above a populated MC topology, but these are not considered today and can be addressed separately if and when they appear. The final sched domain topology for a 2 socket Ampere Altra system is unchanged with or without CONFIG_SCHED_CLUSTER, and the BUG is avoided: For CPU0: CONFIG_SCHED_CLUSTER=y CLS [0-1] DIE [0-79] NUMA [0-159] CONFIG_SCHED_CLUSTER is not set DIE [0-79] NUMA [0-159] Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Catalin Marinas Cc: Will Deacon Cc: Peter Zijlstra Cc: Vincent Guittot Cc: D. Scott Phillips Cc: Ilkka Koskinen Cc: # 5.16.x Suggested-by: Barry Song Reviewed-by: Barry Song Reviewed-by: Dietmar Eggemann Acked-by: Sudeep Holla Signed-off-by: Darren Hart Link: https://lore.kernel.org/r/c8fe9fce7c86ed56b4c455b8c902982dc2303868.1649696956.git.darren@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1d6636ebaac5..5497c5ab7318 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -667,6 +667,15 @@ const struct cpumask *cpu_coregroup_mask(int cpu) core_mask = &cpu_topology[cpu].llc_sibling; } + /* + * For systems with no shared cpu-side LLC but with clusters defined, + * extend core_mask to cluster_siblings. The sched domain builder will + * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled. + */ + if (IS_ENABLED(CONFIG_SCHED_CLUSTER) && + cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling)) + core_mask = &cpu_topology[cpu].cluster_sibling; + return core_mask; } From 1dc9f1a66e1718479e1c4f95514e1750602a3cb9 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Sun, 10 Apr 2022 19:36:19 -0700 Subject: [PATCH 165/803] arch_topology: Do not set llc_sibling if llc_id is invalid When ACPI is not enabled, cpuid_topo->llc_id = cpu_topo->llc_id = -1, which will set llc_sibling 0xff(...), this is misleading. Don't set llc_sibling(default 0) if we don't know the cache topology. Reviewed-by: Sudeep Holla Signed-off-by: Wang Qing Fixes: 37c3ec2d810f ("arm64: topology: divorce MC scheduling domain from core_siblings") Cc: stable Link: https://lore.kernel.org/r/1649644580-54626-1-git-send-email-wangqing@vivo.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 5497c5ab7318..f73b836047cf 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -693,7 +693,7 @@ void update_siblings_masks(unsigned int cpuid) for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->llc_id == cpu_topo->llc_id) { + if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } From 930e2607638de8325686319b2789323cc85ea671 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Apr 2022 14:45:50 -0700 Subject: [PATCH 166/803] f2fs: remove obsolete whint_mode This patch removes obsolete whint_mode. Fixes: 41d36a9f3e53 ("fs: remove kiocb.ki_hint") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 70 ---------------------- fs/f2fs/f2fs.h | 9 --- fs/f2fs/segment.c | 95 ------------------------------ fs/f2fs/super.c | 32 +--------- 4 files changed, 1 insertion(+), 205 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 4a2426f0485a..ad8dc8c040a2 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -235,12 +235,6 @@ offgrpjquota Turn off group journalled quota. offprjjquota Turn off project journalled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. -whint_mode=%s Control which write hints are passed down to block - layer. This supports "off", "user-based", and - "fs-based". In "off" mode (default), f2fs does not pass - down hints. In "user-based" mode, f2fs tries to pass - down hints given by users. And in "fs-based" mode, f2fs - passes down hints with its policy. alloc_mode=%s Adjust block allocation policy, which supports "reuse" and "default". fsync_mode=%s Control the policy of fsync. Currently supports "posix", @@ -751,70 +745,6 @@ In order to identify whether the data in the victim segment are valid or not, F2FS manages a bitmap. Each bit represents the validity of a block, and the bitmap is composed of a bit stream covering whole blocks in main area. -Write-hint Policy ------------------ - -1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. - -2) whint_mode=user-based. F2FS tries to pass down hints given by -users. - -===================== ======================== =================== -User F2FS Block -===================== ======================== =================== -N/A META WRITE_LIFE_NOT_SET -N/A HOT_NODE " -N/A WARM_NODE " -N/A COLD_NODE " -ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME -extension list " " - --- buffered io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " " -WRITE_LIFE_MEDIUM " " -WRITE_LIFE_LONG " " - --- direct io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " WRITE_LIFE_NONE -WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM -WRITE_LIFE_LONG " WRITE_LIFE_LONG -===================== ======================== =================== - -3) whint_mode=fs-based. F2FS passes down hints with its policy. - -===================== ======================== =================== -User F2FS Block -===================== ======================== =================== -N/A META WRITE_LIFE_MEDIUM; -N/A HOT_NODE WRITE_LIFE_NOT_SET -N/A WARM_NODE " -N/A COLD_NODE WRITE_LIFE_NONE -ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME -extension list " " - --- buffered io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG -WRITE_LIFE_NONE " " -WRITE_LIFE_MEDIUM " " -WRITE_LIFE_LONG " " - --- direct io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " WRITE_LIFE_NONE -WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM -WRITE_LIFE_LONG " WRITE_LIFE_LONG -===================== ======================== =================== - Fallocate(2) Policy ------------------- diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cd1e65bcf0b0..8c570de21ed5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -154,7 +154,6 @@ struct f2fs_mount_info { int s_jquota_fmt; /* Format of quota to use */ #endif /* For which write hints are passed down to block layer */ - int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ @@ -1333,12 +1332,6 @@ enum { FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */ }; -enum { - WHINT_MODE_OFF, /* not pass down write hints */ - WHINT_MODE_USER, /* try to pass down hints given by users */ - WHINT_MODE_FS, /* pass down hints with F2FS policy */ -}; - enum { ALLOC_MODE_DEFAULT, /* stay default */ ALLOC_MODE_REUSE, /* reuse segments as much as possible */ @@ -3657,8 +3650,6 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); void f2fs_destroy_segment_manager_caches(void); int f2fs_rw_hint_to_seg_type(enum rw_hint hint); -enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, - enum page_type type, enum temp_type temp); unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int segno); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 22dfeb991529..bd9731cdec56 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3243,101 +3243,6 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint) } } -/* This returns write hints for each segment type. This hints will be - * passed down to block layer. There are mapping tables which depend on - * the mount option 'whint_mode'. - * - * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. - * - * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. - * - * User F2FS Block - * ---- ---- ----- - * META WRITE_LIFE_NOT_SET - * HOT_NODE " - * WARM_NODE " - * COLD_NODE " - * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME - * extension list " " - * - * -- buffered io - * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME - * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT - * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET - * WRITE_LIFE_NONE " " - * WRITE_LIFE_MEDIUM " " - * WRITE_LIFE_LONG " " - * - * -- direct io - * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME - * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT - * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET - * WRITE_LIFE_NONE " WRITE_LIFE_NONE - * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM - * WRITE_LIFE_LONG " WRITE_LIFE_LONG - * - * 3) whint_mode=fs-based. F2FS passes down hints with its policy. - * - * User F2FS Block - * ---- ---- ----- - * META WRITE_LIFE_MEDIUM; - * HOT_NODE WRITE_LIFE_NOT_SET - * WARM_NODE " - * COLD_NODE WRITE_LIFE_NONE - * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME - * extension list " " - * - * -- buffered io - * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME - * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT - * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG - * WRITE_LIFE_NONE " " - * WRITE_LIFE_MEDIUM " " - * WRITE_LIFE_LONG " " - * - * -- direct io - * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME - * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT - * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET - * WRITE_LIFE_NONE " WRITE_LIFE_NONE - * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM - * WRITE_LIFE_LONG " WRITE_LIFE_LONG - */ - -enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, - enum page_type type, enum temp_type temp) -{ - if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { - if (type == DATA) { - if (temp == WARM) - return WRITE_LIFE_NOT_SET; - else if (temp == HOT) - return WRITE_LIFE_SHORT; - else if (temp == COLD) - return WRITE_LIFE_EXTREME; - } else { - return WRITE_LIFE_NOT_SET; - } - } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { - if (type == DATA) { - if (temp == WARM) - return WRITE_LIFE_LONG; - else if (temp == HOT) - return WRITE_LIFE_SHORT; - else if (temp == COLD) - return WRITE_LIFE_EXTREME; - } else if (type == NODE) { - if (temp == WARM || temp == HOT) - return WRITE_LIFE_NOT_SET; - else if (temp == COLD) - return WRITE_LIFE_NONE; - } else if (type == META) { - return WRITE_LIFE_MEDIUM; - } - } - return WRITE_LIFE_NOT_SET; -} - static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ea939db18f88..4368f90571bd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -138,7 +138,6 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, - Opt_whint, Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, @@ -214,7 +213,6 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_whint, "whint_mode=%s"}, {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, @@ -975,22 +973,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) f2fs_info(sbi, "quota operations not supported"); break; #endif - case Opt_whint: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "user-based")) { - F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; - } else if (!strcmp(name, "off")) { - F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; - } else if (!strcmp(name, "fs-based")) { - F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; case Opt_alloc: name = match_strdup(&args[0]); if (!name) @@ -1328,12 +1310,6 @@ default_check: return -EINVAL; } - /* Not pass down write hints if the number of active logs is lesser - * than NR_CURSEG_PERSIST_TYPE. - */ - if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE) - F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; - if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; @@ -1978,10 +1954,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); - if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) - seq_printf(seq, ",whint_mode=%s", "user-based"); - else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) - seq_printf(seq, ",whint_mode=%s", "fs-based"); fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb); @@ -2033,7 +2005,6 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE; F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; - F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); @@ -2314,8 +2285,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY || - F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { + if (*flags & SB_RDONLY) { sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); From 0adc2ab0e8a88a0e8b98dae5fc1443ae8c7062ba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Apr 2022 15:01:58 -0700 Subject: [PATCH 167/803] f2fs: keep io_flags to avoid IO split due to different op_flags in two fio holders Let's attach io_flags to bio only, so that we can merge IOs given original io_flags only. Fixes: 64bf0eef0171 ("f2fs: pass the bio operation to bio_alloc_bioset") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e0c2e773c8d..9a1a526f2092 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -388,11 +388,23 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } -static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag) +static unsigned int f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; - unsigned int fua_flag = io_flag & temp_mask; - unsigned int meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; + unsigned int fua_flag, meta_flag, io_flag; + unsigned int op_flags = 0; + + if (fio->op != REQ_OP_WRITE) + return 0; + if (fio->type == DATA) + io_flag = fio->sbi->data_io_flag; + else if (fio->type == NODE) + io_flag = fio->sbi->node_io_flag; + else + return 0; + + fua_flag = io_flag & temp_mask; + meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; /* * data/node io flag bits per temp: @@ -401,9 +413,10 @@ static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag) * Cold | Warm | Hot | Cold | Warm | Hot | */ if ((1 << fio->temp) & meta_flag) - fio->op_flags |= REQ_META; + op_flags |= REQ_META; if ((1 << fio->temp) & fua_flag) - fio->op_flags |= REQ_FUA; + op_flags |= REQ_FUA; + return op_flags; } static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) @@ -413,14 +426,10 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) sector_t sector; struct bio *bio; - if (fio->type == DATA) - __attach_io_flag(fio, sbi->data_io_flag); - else if (fio->type == NODE) - __attach_io_flag(fio, sbi->node_io_flag); - bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or); - bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags, GFP_NOIO, - &f2fs_bioset); + bio = bio_alloc_bioset(bdev, npages, + fio->op | fio->op_flags | f2fs_io_flags(fio), + GFP_NOIO, &f2fs_bioset); bio->bi_iter.bi_sector = sector; if (is_read_io(fio->op)) { bio->bi_end_io = f2fs_read_end_io; From 27275f181c7add59c211c7e40c442d8004b1e664 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Mar 2022 11:28:07 -0700 Subject: [PATCH 168/803] f2fs: fix wrong condition check when failing metapage read This patch fixes wrong initialization. Fixes: 50c63009f6ab ("f2fs: avoid an infinite loop in f2fs_sync_dirty_inodes") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f5366feea82d..909085a78f9c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -98,9 +98,9 @@ repeat: } if (unlikely(!PageUptodate(page))) { - if (page->index == sbi->metapage_eio_ofs && - sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) { - set_ckpt_flags(sbi, CP_ERROR_FLAG); + if (page->index == sbi->metapage_eio_ofs) { + if (sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) + set_ckpt_flags(sbi, CP_ERROR_FLAG); } else { sbi->metapage_eio_ofs = page->index; sbi->metapage_eio_cnt = 0; From 08ef48404965cfef99343d6bbbcf75b88c74aa0e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 14:34:37 +0100 Subject: [PATCH 169/803] ASoC: da7219: Fix change notifications for tone generator frequency The tone generator frequency control just returns 0 on successful write, not a boolean value indicating if there was a change or not. Compare what was written with the value that was there previously so that notifications are generated appropriately when the value changes. Signed-off-by: Mark Brown Reviewed-by: Adam Thomson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220420133437.569229-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/da7219.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index 13009d08b09a..c7493549a9a5 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, struct soc_mixer_control *mixer_ctrl = (struct soc_mixer_control *) kcontrol->private_value; unsigned int reg = mixer_ctrl->reg; - __le16 val; + __le16 val_new, val_old; int ret; /* @@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol, * Therefore we need to convert to little endian here to align with * HW registers. */ - val = cpu_to_le16(ucontrol->value.integer.value[0]); + val_new = cpu_to_le16(ucontrol->value.integer.value[0]); mutex_lock(&da7219->ctrl_lock); - ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val)); + ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old)); + if (ret == 0 && (val_old != val_new)) + ret = regmap_raw_write(da7219->regmap, reg, + &val_new, sizeof(val_new)); mutex_unlock(&da7219->ctrl_lock); - return ret; + if (ret < 0) + return ret; + + return val_old != val_new; } From b9b3fe152e4966cf8562630de67aa49e2f9c9222 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 08:44:59 +1000 Subject: [PATCH 170/803] xfs: convert buffer flags to unsigned. 5.18 w/ std=gnu11 compiled with gcc-5 wants flags stored in unsigned fields to be unsigned. This manifests as a compiler error such as: /kisskb/src/fs/xfs/./xfs_trace.h:432:2: note: in expansion of macro 'TP_printk' TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " ^ /kisskb/src/fs/xfs/./xfs_trace.h:440:5: note: in expansion of macro '__print_flags' __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), ^ /kisskb/src/fs/xfs/xfs_buf.h:67:4: note: in expansion of macro 'XBF_UNMAPPED' { XBF_UNMAPPED, "UNMAPPED" } ^ /kisskb/src/fs/xfs/./xfs_trace.h:440:40: note: in expansion of macro 'XFS_BUF_FLAGS' __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), ^ /kisskb/src/fs/xfs/./xfs_trace.h: In function 'trace_raw_output_xfs_buf_flags_class': /kisskb/src/fs/xfs/xfs_buf.h:46:23: error: initializer element is not constant #define XBF_UNMAPPED (1 << 31)/* do not map the buffer */ as __print_flags assigns XFS_BUF_FLAGS to a structure that uses an unsigned long for the flag. Since this results in the value of XBF_UNMAPPED causing a signed integer overflow, the result is technically undefined behavior, which gcc-5 does not accept as an integer constant. This is based on a patch from Arnd Bergman . Reported-by: Geert Uytterhoeven Signed-off-by: Dave Chinner Reviewed-by: Chandan Babu R Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 6 +++--- fs/xfs/xfs_buf.h | 42 +++++++++++++++++++++--------------------- fs/xfs/xfs_trans.h | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e1afb9e503e1..bf4e60871068 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -406,7 +406,7 @@ xfs_buf_alloc_pages( STATIC int _xfs_buf_map_pages( struct xfs_buf *bp, - uint flags) + xfs_buf_flags_t flags) { ASSERT(bp->b_flags & _XBF_PAGES); if (bp->b_page_count == 1) { @@ -868,7 +868,7 @@ xfs_buf_read_uncached( struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { @@ -903,7 +903,7 @@ int xfs_buf_get_uncached( struct xfs_buftarg *target, size_t numblks, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp) { int error; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index edcb6254fa6a..1ee3056ff9cf 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -22,28 +22,28 @@ struct xfs_buf; #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_NO_IOACCT (1 << 3) /* bypass I/O accounting (non-LRU bufs) */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ -#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ +#define XBF_READ (1u << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1u << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */ +#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */ +#define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */ +#define XBF_WRITE_FAIL (1u << 7) /* async writes have failed on this buffer */ /* buffer type flags for write callbacks */ -#define _XBF_INODES (1 << 16)/* inode buffer */ -#define _XBF_DQUOTS (1 << 17)/* dquot buffer */ -#define _XBF_LOGRECOVERY (1 << 18)/* log recovery buffer */ +#define _XBF_INODES (1u << 16)/* inode buffer */ +#define _XBF_DQUOTS (1u << 17)/* dquot buffer */ +#define _XBF_LOGRECOVERY (1u << 18)/* log recovery buffer */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_PAGES (1u << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1u << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */ /* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 30)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (1 << 31)/* do not map the buffer */ +#define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1u << 31)/* do not map the buffer */ typedef unsigned int xfs_buf_flags_t; @@ -58,7 +58,7 @@ typedef unsigned int xfs_buf_flags_t; { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ { _XBF_INODES, "INODES" }, \ { _XBF_DQUOTS, "DQUOTS" }, \ - { _XBF_LOGRECOVERY, "LOG_RECOVERY" }, \ + { _XBF_LOGRECOVERY, "LOG_RECOVERY" }, \ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ @@ -247,11 +247,11 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags, - struct xfs_buf **bpp); +int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, + xfs_buf_flags_t flags, struct xfs_buf **bpp); int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, - size_t numblks, int flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); + size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); void xfs_buf_hold(struct xfs_buf *bp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index de177842b951..0c82673238f4 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -175,7 +175,7 @@ xfs_trans_get_buf( struct xfs_buftarg *target, xfs_daddr_t blkno, int numblks, - uint flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp) { DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); From d65a92de4383e54b920ba11f333032b0ea5e4174 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 21 Apr 2022 08:45:14 +1000 Subject: [PATCH 171/803] MAINTAINERS: update IOMAP FILESYSTEM LIBRARY and XFS FILESYSTEM In IOMAP FILESYSTEM LIBRARY and XFS FILESYSTEM, the M(ail): entry is redundant with the L(ist): entry, remove the redundant M(ail): entry. Signed-off-by: Tiezhu Yang Reviewed-by: Darrick J. Wong Reviewed-by: Chaitanya Kulkarni Signed-off-by: Dave Chinner --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 61d9f114c37f..726608fa1079 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10238,8 +10238,6 @@ F: drivers/net/ethernet/sgi/ioc3-eth.c IOMAP FILESYSTEM LIBRARY M: Christoph Hellwig M: Darrick J. Wong -M: linux-xfs@vger.kernel.org -M: linux-fsdevel@vger.kernel.org L: linux-xfs@vger.kernel.org L: linux-fsdevel@vger.kernel.org S: Supported @@ -21596,7 +21594,6 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM C: irc://irc.oftc.net/xfs M: Darrick J. Wong -M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org S: Supported W: http://xfs.org/ From 9a5280b312e2e7898b6397b2ca3cfd03f67d7be1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Apr 2022 08:45:16 +1000 Subject: [PATCH 172/803] xfs: reorder iunlink remove operation in xfs_ifree The O_TMPFILE creation implementation creates a specific order of operations for inode allocation/freeing and unlinked list modification. Currently both are serialised by the AGI, so the order doesn't strictly matter as long as the are both in the same transaction. However, if we want to move the unlinked list insertions largely out from under the AGI lock, then we have to be concerned about the order in which we do unlinked list modification operations. O_TMPFILE creation tells us this order is inode allocation/free, then unlinked list modification. Change xfs_ifree() to use this same ordering on unlinked list removal. This way we always guarantee that when we enter the iunlinked list removal code from this path, we already have the AGI locked and we don't have to worry about lock nesting AGI reads inside unlink list locks because it's already locked and attached to the transaction. We can do this safely as the inode freeing and unlinked list removal are done in the same transaction and hence are atomic operations with respect to log recovery. Reported-by: Frank Hofmann Fixes: 298f7bec503f ("xfs: pin inode backing buffer to the inode log item") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9de6205fe134..39ae53efb3ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2594,14 +2594,13 @@ xfs_ifree_cluster( } /* - * This is called to return an inode to the inode free list. - * The inode should already be truncated to 0 length and have - * no pages associated with it. This routine also assumes that - * the inode is already a part of the transaction. + * This is called to return an inode to the inode free list. The inode should + * already be truncated to 0 length and have no pages associated with it. This + * routine also assumes that the inode is already a part of the transaction. * - * The on-disk copy of the inode will have been added to the list - * of unlinked inodes in the AGI. We need to remove the inode from - * that list atomically with respect to freeing it here. + * The on-disk copy of the inode will have been added to the list of unlinked + * inodes in the AGI. We need to remove the inode from that list atomically with + * respect to freeing it here. */ int xfs_ifree( @@ -2623,13 +2622,16 @@ xfs_ifree( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); /* - * Pull the on-disk inode from the AGI unlinked list. + * Free the inode first so that we guarantee that the AGI lock is going + * to be taken before we remove the inode from the unlinked list. This + * makes the AGI lock -> unlinked list modification order the same as + * used in O_TMPFILE creation. */ - error = xfs_iunlink_remove(tp, pag, ip); - if (error) - goto out; - error = xfs_difree(tp, pag, ip->i_ino, &xic); + if (error) + return error; + + error = xfs_iunlink_remove(tp, pag, ip); if (error) goto out; From 87950929e2ff2236207bdbe14bff8230558b541b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 9 Apr 2022 18:59:58 +0800 Subject: [PATCH 173/803] pinctrl: mediatek: moore: Fix build error If EINT_MTK is m and PINCTRL_MTK_V2 is y, build fails: drivers/pinctrl/mediatek/pinctrl-moore.o: In function `mtk_gpio_set_config': pinctrl-moore.c:(.text+0xa6c): undefined reference to `mtk_eint_set_debounce' drivers/pinctrl/mediatek/pinctrl-moore.o: In function `mtk_gpio_to_irq': pinctrl-moore.c:(.text+0xacc): undefined reference to `mtk_eint_find_irq' Select EINT_MTK for PINCTRL_MTK_V2 to fix this. Fixes: 8174a8512e3e ("pinctrl: mediatek: make MediaTek pinctrl v2 driver ready for buidling loadable module") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20220409105958.37412-1-yuehaibing@huawei.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 8dca1ef04965..40accd110c3d 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -30,6 +30,7 @@ config PINCTRL_MTK_MOORE select GENERIC_PINMUX_FUNCTIONS select GPIOLIB select OF_GPIO + select EINT_MTK select PINCTRL_MTK_V2 config PINCTRL_MTK_PARIS From 694852ead287a3433126e7ebda397b242dc99624 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 20:52:35 +0900 Subject: [PATCH 174/803] zonefs: Clear inode information flags on inode creation Ensure that the i_flags field of struct zonefs_inode_info is cleared to 0 when initializing a zone file inode, avoiding seeing the flag ZONEFS_ZONE_OPEN being incorrectly set. Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 3614c7834007..75d8dabe0807 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1142,6 +1142,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) inode_init_once(&zi->i_vnode); mutex_init(&zi->i_truncate_mutex); zi->i_wr_refcnt = 0; + zi->i_flags = 0; return &zi->i_vnode; } From 1da18a296f5ba4f99429e62a7cf4fdbefa598902 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 17:41:37 +0900 Subject: [PATCH 175/803] zonefs: Fix management of open zones The mount option "explicit_open" manages the device open zone resources to ensure that if an application opens a sequential file for writing, the file zone can always be written by explicitly opening the zone and accounting for that state with the s_open_zones counter. However, if some zones are already open when mounting, the device open zone resource usage status will be larger than the initial s_open_zones value of 0. Ensure that this inconsistency does not happen by closing any sequential zone that is open when mounting. Furthermore, with ZNS drives, closing an explicitly open zone that has not been written will change the zone state to "closed", that is, the zone will remain in an active state. Since this can then cause failures of explicit open operations on other zones if the drive active zone resources are exceeded, we need to make sure that the zone is not active anymore by resetting it instead of closing it. To address this, zonefs_zone_mgmt() is modified to change a REQ_OP_ZONE_CLOSE request into a REQ_OP_ZONE_RESET for sequential zones that have not been written. Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 75d8dabe0807..e20e7c841489 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -35,6 +35,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode, lockdep_assert_held(&zi->i_truncate_mutex); + /* + * With ZNS drives, closing an explicitly open zone that has not been + * written will change the zone state to "closed", that is, the zone + * will remain active. Since this can then cause failure of explicit + * open operation on other zones if the drive active zone resources + * are exceeded, make sure that the zone does not remain active by + * resetting it. + */ + if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) + op = REQ_OP_ZONE_RESET; + trace_zonefs_zone_mgmt(inode, op); ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); @@ -1294,12 +1305,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, inc_nlink(parent); } -static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, - enum zonefs_ztype type) +static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + enum zonefs_ztype type) { struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_inode_info *zi = ZONEFS_I(inode); + int ret = 0; inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; inode->i_mode = S_IFREG | sbi->s_perm; @@ -1324,6 +1336,22 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; + + /* + * For sequential zones, make sure that any open zone is closed first + * to ensure that the initial number of open zones is 0, in sync with + * the open zone accounting done when the mount option + * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. + */ + if (type == ZONEFS_ZTYPE_SEQ && + (zone->cond == BLK_ZONE_COND_IMP_OPEN || + zone->cond == BLK_ZONE_COND_EXP_OPEN)) { + mutex_lock(&zi->i_truncate_mutex); + ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); + mutex_unlock(&zi->i_truncate_mutex); + } + + return ret; } static struct dentry *zonefs_create_inode(struct dentry *parent, @@ -1333,6 +1361,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; + int ret; dentry = d_alloc_name(parent, name); if (!dentry) @@ -1343,10 +1372,16 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, goto dput; inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; - if (zone) - zonefs_init_file_inode(inode, zone, type); - else + if (zone) { + ret = zonefs_init_file_inode(inode, zone, type); + if (ret) { + iput(inode); + goto dput; + } + } else { zonefs_init_dir_inode(dir, inode, type); + } + d_add(dentry, inode); dir->i_size++; From ac875df4d854ab13d9c4af682a1837a1214fecec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 20 Apr 2022 16:14:07 +0200 Subject: [PATCH 176/803] pinctrl: samsung: fix missing GPIOLIB on ARM64 Exynos config The Samsung pinctrl drivers depend on OF_GPIO, which is part of GPIOLIB. ARMv7 Exynos platform selects GPIOLIB and Samsung pinctrl drivers. ARMv8 Exynos selects only the latter leading to possible wrong configuration on ARMv8 build: WARNING: unmet direct dependencies detected for PINCTRL_EXYNOS Depends on [n]: PINCTRL [=y] && OF_GPIO [=n] && (ARCH_EXYNOS [=y] || ARCH_S5PV210 || COMPILE_TEST [=y]) Selected by [y]: - ARCH_EXYNOS [=y] Always select the GPIOLIB from the Samsung pinctrl drivers to fix the issue. This requires removing of OF_GPIO dependency (to avoid recursive dependency), so add dependency on OF for COMPILE_TEST cases. Reported-by: Necip Fazil Yildiran Fixes: eed6b3eb20b9 ("arm64: Split out platform options to separate Kconfig") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220420141407.470955-1-krzysztof.kozlowski@linaro.org --- arch/arm/mach-exynos/Kconfig | 1 - drivers/pinctrl/samsung/Kconfig | 11 ++++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index f7d993628cb7..a9c1efcf7c9c 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -17,7 +17,6 @@ menuconfig ARCH_EXYNOS select EXYNOS_PMU select EXYNOS_SROM select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS - select GPIOLIB select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5 select HAVE_ARM_SCU if SMP select PINCTRL diff --git a/drivers/pinctrl/samsung/Kconfig b/drivers/pinctrl/samsung/Kconfig index dfd805e76862..7b0576f71376 100644 --- a/drivers/pinctrl/samsung/Kconfig +++ b/drivers/pinctrl/samsung/Kconfig @@ -4,14 +4,13 @@ # config PINCTRL_SAMSUNG bool - depends on OF_GPIO + select GPIOLIB select PINMUX select PINCONF config PINCTRL_EXYNOS bool "Pinctrl common driver part for Samsung Exynos SoCs" - depends on OF_GPIO - depends on ARCH_EXYNOS || ARCH_S5PV210 || COMPILE_TEST + depends on ARCH_EXYNOS || ARCH_S5PV210 || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG select PINCTRL_EXYNOS_ARM if ARM && (ARCH_EXYNOS || ARCH_S5PV210) select PINCTRL_EXYNOS_ARM64 if ARM64 && ARCH_EXYNOS @@ -26,12 +25,10 @@ config PINCTRL_EXYNOS_ARM64 config PINCTRL_S3C24XX bool "Samsung S3C24XX SoC pinctrl driver" - depends on OF_GPIO - depends on ARCH_S3C24XX || COMPILE_TEST + depends on ARCH_S3C24XX || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG config PINCTRL_S3C64XX bool "Samsung S3C64XX SoC pinctrl driver" - depends on OF_GPIO - depends on ARCH_S3C64XX || COMPILE_TEST + depends on ARCH_S3C64XX || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG From 084c16ab423a8890121b902b405823bfec5b4365 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 12 Apr 2022 08:34:31 +0000 Subject: [PATCH 177/803] mtd: rawnand: Fix return value check of wait_for_completion_timeout wait_for_completion_timeout() returns unsigned long not int. It returns 0 if timed out, and positive if completed. The check for <= 0 is ambiguous and should be == 0 here indicating timeout which is the only error case. Fixes: 83738d87e3a0 ("mtd: sh_flctl: Add DMA capabilty") Signed-off-by: Miaoqian Lin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220412083435.29254-1-linmq006@gmail.com --- drivers/mtd/nand/raw/sh_flctl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index b85b9c6fcc42..a278829469d6 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, dma_addr_t dma_addr; dma_cookie_t cookie; uint32_t reg; - int ret; + int ret = 0; + unsigned long time_left; if (dir == DMA_FROM_DEVICE) { chan = flctl->chan_fifo0_rx; @@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, goto out; } - ret = + time_left = wait_for_completion_timeout(&flctl->dma_complete, msecs_to_jiffies(3000)); - if (ret <= 0) { + if (time_left == 0) { dmaengine_terminate_all(chan); dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n"); + ret = -ETIMEDOUT; } out: @@ -441,7 +443,7 @@ out: dma_unmap_single(chan->device->dev, dma_addr, len, dir); - /* ret > 0 is success */ + /* ret == 0 is success */ return ret; } @@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) /* initiate DMA transfer */ if (flctl->chan_fifo0_rx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE)) goto convert; /* DMA success */ /* do polling transfer */ @@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, /* initiate DMA transfer */ if (flctl->chan_fifo0_tx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE)) return; /* DMA success */ /* do polling transfer */ From 37c5f9e80e015d0df17d0c377c18523002986851 Mon Sep 17 00:00:00 2001 From: Oleksandr Ocheretnyi Date: Sun, 17 Apr 2022 11:46:47 -0700 Subject: [PATCH 178/803] mtd: fix 'part' field data corruption in mtd_info Commit 46b5889cc2c5 ("mtd: implement proper partition handling") started using "mtd_get_master_ofs()" in mtd callbacks to determine memory offsets by means of 'part' field from mtd_info, what previously was smashed accessing 'master' field in the mtd_set_dev_defaults() method. That provides wrong offset what causes hardware access errors. Just make 'part', 'master' as separate fields, rather than using union type to avoid 'part' data corruption when mtd_set_dev_defaults() is called. Fixes: 46b5889cc2c5 ("mtd: implement proper partition handling") Signed-off-by: Oleksandr Ocheretnyi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220417184649.449289-1-oocheret@cisco.com --- include/linux/mtd/mtd.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 151607e9d64a..955aee14b0f7 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -389,10 +389,8 @@ struct mtd_info { /* List of partitions attached to this MTD device */ struct list_head partitions; - union { - struct mtd_part part; - struct mtd_master master; - }; + struct mtd_part part; + struct mtd_master master; }; static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd) From ba7542eb2dd5dfc75c457198b88986642e602065 Mon Sep 17 00:00:00 2001 From: Md Sadre Alam Date: Mon, 18 Apr 2022 13:18:27 +0530 Subject: [PATCH 179/803] mtd: rawnand: qcom: fix memory corruption that causes panic This patch fixes a memory corruption that occurred in the nand_scan() path for Hynix nand device. On boot, for Hynix nand device will panic at a weird place: | Unable to handle kernel NULL pointer dereference at virtual address 00000070 | [00000070] *pgd=00000000 | Internal error: Oops: 5 [#1] PREEMPT SMP ARM | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.17.0-01473-g13ae1769cfb0 #38 | Hardware name: Generic DT based system | PC is at nandc_set_reg+0x8/0x1c | LR is at qcom_nandc_command+0x20c/0x5d0 | pc : [] lr : [] psr: 00000113 | sp : c14adc50 ip : c14ee208 fp : c0cc970c | r10: 000000a3 r9 : 00000000 r8 : 00000040 | r7 : c16f6a00 r6 : 00000090 r5 : 00000004 r4 :c14ee040 | r3 : 00000000 r2 : 0000000b r1 : 00000000 r0 :c14ee040 | Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none | Control: 10c5387d Table: 8020406a DAC: 00000051 | Register r0 information: slab kmalloc-2k start c14ee000 pointer offset 64 size 2048 | Process swapper/0 (pid: 1, stack limit = 0x(ptrval)) | nandc_set_reg from qcom_nandc_command+0x20c/0x5d0 | qcom_nandc_command from nand_readid_op+0x198/0x1e8 | nand_readid_op from hynix_nand_has_valid_jedecid+0x30/0x78 | hynix_nand_has_valid_jedecid from hynix_nand_init+0xb8/0x454 | hynix_nand_init from nand_scan_with_ids+0xa30/0x14a8 | nand_scan_with_ids from qcom_nandc_probe+0x648/0x7b0 | qcom_nandc_probe from platform_probe+0x58/0xac The problem is that the nand_scan()'s qcom_nand_attach_chip callback is updating the nandc->max_cwperpage from 1 to 4 or 8 based on page size. This causes the sg_init_table of clear_bam_transaction() in the driver's qcom_nandc_command() to memset much more than what was initially allocated by alloc_bam_transaction(). This patch will update nandc->max_cwperpage 1 to 4 or 8 based on page size in qcom_nand_attach_chip call back after freeing the previously allocated memory for bam txn as per nandc->max_cwperpage = 1 and then again allocating bam txn as per nandc->max_cwperpage = 4 or 8 based on page size in qcom_nand_attach_chip call back itself. Cc: stable@vger.kernel.org Fixes: 6a3cec64f18c ("mtd: rawnand: qcom: convert driver to nand_scan()") Reported-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Co-developed-by: Sricharan R Signed-off-by: Sricharan R Signed-off-by: Md Sadre Alam Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/1650268107-5363-1-git-send-email-quic_mdalam@quicinc.com --- drivers/mtd/nand/raw/qcom_nandc.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 1a77542c6d67..048b255faa76 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2651,10 +2651,23 @@ static int qcom_nand_attach_chip(struct nand_chip *chip) ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops); + /* Free the initially allocated BAM transaction for reading the ONFI params */ + if (nandc->props->is_bam) + free_bam_transaction(nandc); nandc->max_cwperpage = max_t(unsigned int, nandc->max_cwperpage, cwperpage); + /* Now allocate the BAM transaction based on updated max_cwperpage */ + if (nandc->props->is_bam) { + nandc->bam_txn = alloc_bam_transaction(nandc); + if (!nandc->bam_txn) { + dev_err(nandc->dev, + "failed to allocate bam transaction\n"); + return -ENOMEM; + } + } + /* * DATA_UD_BYTES varies based on whether the read/write command protects * spare data with ECC too. We protect spare data by default, so we set @@ -2955,17 +2968,6 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, if (ret) return ret; - if (nandc->props->is_bam) { - free_bam_transaction(nandc); - nandc->bam_txn = alloc_bam_transaction(nandc); - if (!nandc->bam_txn) { - dev_err(nandc->dev, - "failed to allocate bam transaction\n"); - nand_cleanup(chip); - return -ENOMEM; - } - } - ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0); if (ret) nand_cleanup(chip); From e23e50e7acc8d8f16498e9c129db33e6a00e80eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2022 17:12:34 -0700 Subject: [PATCH 180/803] USB: serial: whiteheat: fix heap overflow in WHITEHEAT_GET_DTR_RTS The sizeof(struct whitehat_dr_info) can be 4 bytes under CONFIG_AEABI=n due to "-mabi=apcs-gnu", even though it has a single u8: whiteheat_private { __u8 mcr; /* 0 1 */ /* size: 4, cachelines: 1, members: 1 */ /* padding: 3 */ /* last cacheline: 4 bytes */ }; The result is technically harmless, as both the source and the destinations are currently the same allocation size (4 bytes) and don't use their padding, but if anything were to ever be added after the "mcr" member in "struct whiteheat_private", it would be overwritten. The structs both have a single u8 "mcr" member, but are 4 bytes in padded size. The memcpy() destination was explicitly targeting the u8 member (size 1) with the length of the whole structure (size 4), triggering the memcpy buffer overflow warning: In file included from include/linux/string.h:253, from include/linux/bitmap.h:11, from include/linux/cpumask.h:12, from include/linux/smp.h:13, from include/linux/lockdep.h:14, from include/linux/spinlock.h:62, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/slab.h:15, from drivers/usb/serial/whiteheat.c:17: In function 'fortify_memcpy_chk', inlined from 'firm_send_command' at drivers/usb/serial/whiteheat.c:587:4: include/linux/fortify-string.h:328:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, just assign the one byte directly. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202204142318.vDqjjSFn-lkp@intel.com Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220421001234.2421107-1-keescook@chromium.org Signed-off-by: Johan Hovold --- drivers/usb/serial/whiteheat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index da65d14c9ed5..06aad0d727dd 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -584,9 +584,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, switch (command) { case WHITEHEAT_GET_DTR_RTS: info = usb_get_serial_port_data(port); - memcpy(&info->mcr, command_info->result_buffer, - sizeof(struct whiteheat_dr_info)); - break; + info->mcr = command_info->result_buffer[0]; + break; } } exit: From 2e3a0d1bfa95b54333f7add3e50e288769373873 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:01 +0100 Subject: [PATCH 181/803] ASoC: meson: Fix event generation for AUI ACODEC mux The AIU ACODEC has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-2-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/aiu-acodec-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/aiu-acodec-ctrl.c b/sound/soc/meson/aiu-acodec-ctrl.c index 22e181646bc3..3776b073a3db 100644 --- a/sound/soc/meson/aiu-acodec-ctrl.c +++ b/sound/soc/meson/aiu-acodec-ctrl.c @@ -58,7 +58,7 @@ static int aiu_acodec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(aiu_acodec_ctrl_mux_enum, AIU_ACODEC_CTRL, From fce49921a22262736cdc3cc74fa67915b75e9363 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:02 +0100 Subject: [PATCH 182/803] ASoC: meson: Fix event generation for AUI CODEC mux The AIU CODEC has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-3-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/aiu-codec-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/aiu-codec-ctrl.c b/sound/soc/meson/aiu-codec-ctrl.c index 59ee66fc2bcd..286ac4983d40 100644 --- a/sound/soc/meson/aiu-codec-ctrl.c +++ b/sound/soc/meson/aiu-codec-ctrl.c @@ -57,7 +57,7 @@ static int aiu_codec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(aiu_hdmi_ctrl_mux_enum, AIU_HDMI_CLK_DATA_CTRL, From 12131008fc13ff7f7690d170b7a8f72d24fd7d1e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Apr 2022 13:38:03 +0100 Subject: [PATCH 183/803] ASoC: meson: Fix event generation for G12A tohdmi mux The G12A tohdmi has a custom put() operation which returns 0 when the value of the mux changes, meaning that events are not generated for userspace. Change to return 1 in this case, the function returns early in the case where there is no change. Signed-off-by: Mark Brown Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421123803.292063-4-broonie@kernel.org Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/meson/g12a-tohdmitx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c index 9b2b59536ced..6c99052feafd 100644 --- a/sound/soc/meson/g12a-tohdmitx.c +++ b/sound/soc/meson/g12a-tohdmitx.c @@ -67,7 +67,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol, snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); - return 0; + return 1; } static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_i2s_mux_enum, TOHDMITX_CTRL0, From a692e13d87cb6d0193387aac55cfcc947077c20b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 19 Apr 2022 14:23:57 +0100 Subject: [PATCH 184/803] btrfs: fix assertion failure during scrub due to block group reallocation During a scrub, or device replace, we can race with block group removal and allocation and trigger the following assertion failure: [7526.385524] assertion failed: cache->start == chunk_offset, in fs/btrfs/scrub.c:3817 [7526.387351] ------------[ cut here ]------------ [7526.387373] kernel BUG at fs/btrfs/ctree.h:3599! [7526.388001] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [7526.388970] CPU: 2 PID: 1158150 Comm: btrfs Not tainted 5.17.0-rc8-btrfs-next-114 #4 [7526.390279] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [7526.392430] RIP: 0010:assertfail.constprop.0+0x18/0x1a [btrfs] [7526.393520] Code: f3 48 c7 c7 20 (...) [7526.396926] RSP: 0018:ffffb9154176bc40 EFLAGS: 00010246 [7526.397690] RAX: 0000000000000048 RBX: ffffa0db8a910000 RCX: 0000000000000000 [7526.398732] RDX: 0000000000000000 RSI: ffffffff9d7239a2 RDI: 00000000ffffffff [7526.399766] RBP: ffffa0db8a911e10 R08: ffffffffa71a3ca0 R09: 0000000000000001 [7526.400793] R10: 0000000000000001 R11: 0000000000000000 R12: ffffa0db4b170800 [7526.401839] R13: 00000003494b0000 R14: ffffa0db7c55b488 R15: ffffa0db8b19a000 [7526.402874] FS: 00007f6c99c40640(0000) GS:ffffa0de6d200000(0000) knlGS:0000000000000000 [7526.404038] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [7526.405040] CR2: 00007f31b0882160 CR3: 000000014b38c004 CR4: 0000000000370ee0 [7526.406112] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [7526.407148] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [7526.408169] Call Trace: [7526.408529] [7526.408839] scrub_enumerate_chunks.cold+0x11/0x79 [btrfs] [7526.409690] ? do_wait_intr_irq+0xb0/0xb0 [7526.410276] btrfs_scrub_dev+0x226/0x620 [btrfs] [7526.410995] ? preempt_count_add+0x49/0xa0 [7526.411592] btrfs_ioctl+0x1ab5/0x36d0 [btrfs] [7526.412278] ? __fget_files+0xc9/0x1b0 [7526.412825] ? kvm_sched_clock_read+0x14/0x40 [7526.413459] ? lock_release+0x155/0x4a0 [7526.414022] ? __x64_sys_ioctl+0x83/0xb0 [7526.414601] __x64_sys_ioctl+0x83/0xb0 [7526.415150] do_syscall_64+0x3b/0xc0 [7526.415675] entry_SYSCALL_64_after_hwframe+0x44/0xae [7526.416408] RIP: 0033:0x7f6c99d34397 [7526.416931] Code: 3c 1c e8 1c ff (...) [7526.419641] RSP: 002b:00007f6c99c3fca8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [7526.420735] RAX: ffffffffffffffda RBX: 00005624e1e007b0 RCX: 00007f6c99d34397 [7526.421779] RDX: 00005624e1e007b0 RSI: 00000000c400941b RDI: 0000000000000003 [7526.422820] RBP: 0000000000000000 R08: 00007f6c99c40640 R09: 0000000000000000 [7526.423906] R10: 00007f6c99c40640 R11: 0000000000000246 R12: 00007fff746755de [7526.424924] R13: 00007fff746755df R14: 0000000000000000 R15: 00007f6c99c40640 [7526.425950] That assertion is relatively new, introduced with commit d04fbe19aefd2 ("btrfs: scrub: cleanup the argument list of scrub_chunk()"). The block group we get at scrub_enumerate_chunks() can actually have a start address that is smaller then the chunk offset we extracted from a device extent item we got from the commit root of the device tree. This is very rare, but it can happen due to a race with block group removal and allocation. For example, the following steps show how this can happen: 1) We are at transaction T, and we have the following blocks groups, sorted by their logical start address: [ bg A, start address A, length 1G (data) ] [ bg B, start address B, length 1G (data) ] (...) [ bg W, start address W, length 1G (data) ] --> logical address space hole of 256M, there used to be a 256M metadata block group here [ bg Y, start address Y, length 256M (metadata) ] --> Y matches W's end offset + 256M Block group Y is the block group with the highest logical address in the whole filesystem; 2) Block group Y is deleted and its extent mapping is removed by the call to remove_extent_mapping() made from btrfs_remove_block_group(). So after this point, the last element of the mapping red black tree, its rightmost node, is the mapping for block group W; 3) While still at transaction T, a new data block group is allocated, with a length of 1G. When creating the block group we do a call to find_next_chunk(), which returns the logical start address for the new block group. This calls returns X, which corresponds to the end offset of the last block group, the rightmost node in the mapping red black tree (fs_info->mapping_tree), plus one. So we get a new block group that starts at logical address X and with a length of 1G. It spans over the whole logical range of the old block group Y, that was previously removed in the same transaction. However the device extent allocated to block group X is not the same device extent that was used by block group Y, and it also does not overlap that extent, which must be always the case because we allocate extents by searching through the commit root of the device tree (otherwise it could corrupt a filesystem after a power failure or an unclean shutdown in general), so the extent allocator is behaving as expected; 4) We have a task running scrub, currently at scrub_enumerate_chunks(). There it searches for device extent items in the device tree, using its commit root. It finds a device extent item that was used by block group Y, and it extracts the value Y from that item into the local variable 'chunk_offset', using btrfs_dev_extent_chunk_offset(); It then calls btrfs_lookup_block_group() to find block group for the logical address Y - since there's currently no block group that starts at that logical address, it returns block group X, because its range contains Y. This results in triggering the assertion: ASSERT(cache->start == chunk_offset); right before calling scrub_chunk(), as cache->start is X and chunk_offset is Y. This is more likely to happen of filesystems not larger than 50G, because for these filesystems we use a 256M size for metadata block groups and a 1G size for data block groups, while for filesystems larger than 50G, we use a 1G size for both data and metadata block groups (except for zoned filesystems). It could also happen on any filesystem size due to the fact that system block groups are always smaller (32M) than both data and metadata block groups, but these are not frequently deleted, so much less likely to trigger the race. So make scrub skip any block group with a start offset that is less than the value we expect, as that means it's a new block group that was created in the current transaction. It's pointless to continue and try to scrub its extents, because scrub searches for extents using the commit root, so it won't find any. For a device replace, skip it as well for the same reasons, and we don't need to worry about the possibility of extents of the new block group not being to the new device, because we have the write duplication setup done through btrfs_map_block(). Fixes: d04fbe19aefd ("btrfs: scrub: cleanup the argument list of scrub_chunk()") CC: stable@vger.kernel.org # 5.17 Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 7 ++++++- fs/btrfs/scrub.c | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 71fd99b48283..f26202621989 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); - /* Commit dev_replace state and reserve 1 item for it. */ + /* + * Commit dev_replace state and reserve 1 item for it. + * This is crucial to ensure we won't miss copying extents for new block + * groups that are allocated after we started the device replace, and + * must be done after setting up the device replace state. + */ trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 11089568b287..8cd713d37ad2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (!cache) goto skip; + ASSERT(cache->start <= chunk_offset); + /* + * We are using the commit root to search for device extents, so + * that means we could have found a device extent item from a + * block group that was deleted in the current transaction. The + * logical start offset of the deleted block group, stored at + * @chunk_offset, might be part of the logical address range of + * a new block group (which uses different physical extents). + * In this case btrfs_lookup_block_group() has returned the new + * block group, and its start address is less than @chunk_offset. + * + * We skip such new block groups, because it's pointless to + * process them, as we won't find their extents because we search + * for them using the commit root of the extent tree. For a device + * replace it's also fine to skip it, we won't miss copying them + * to the target device because we have the write duplication + * setup through the regular write path (by btrfs_map_block()), + * and we have committed a transaction when we started the device + * replace, right after setting up the device replace state. + */ + if (cache->start < chunk_offset) { + btrfs_put_block_group(cache); + goto skip; + } + if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { spin_lock(&cache->lock); if (!cache->to_copy) { @@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->item_needs_writeback = 1; up_write(&dev_replace->rwsem); - ASSERT(cache->start == chunk_offset); ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset, dev_extent_len); From 5f0addf7b89085f8e0a2593faa419d6111612b9b Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 18 Apr 2022 16:15:03 +0900 Subject: [PATCH 185/803] btrfs: zoned: use dedicated lock for data relocation Currently, we use btrfs_inode_{lock,unlock}() to grant an exclusive writeback of the relocation data inode in btrfs_zoned_data_reloc_{lock,unlock}(). However, that can cause a deadlock in the following path. Thread A takes btrfs_inode_lock() and waits for metadata reservation by e.g, waiting for writeback: prealloc_file_extent_cluster() - btrfs_inode_lock(&inode->vfs_inode, 0); - btrfs_prealloc_file_range() ... - btrfs_replace_file_extents() - btrfs_start_transaction ... - btrfs_reserve_metadata_bytes() Thread B (e.g, doing a writeback work) needs to wait for the inode lock to continue writeback process: do_writepages - btrfs_writepages - extent_writpages - btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); - btrfs_inode_lock() The deadlock is caused by relying on the vfs_inode's lock. By using it, we introduced unnecessary exclusion of writeback and btrfs_prealloc_file_range(). Also, the lock at this point is useless as we don't have any dirty pages in the inode yet. Introduce fs_info->zoned_data_reloc_io_lock and use it for the exclusive writeback. Fixes: 35156d852762 ("btrfs: zoned: only allow one process to add pages to a relocation inode") CC: stable@vger.kernel.org # 5.16.x: 869f4cdc73f9: btrfs: zoned: encapsulate inode locking for zoned relocation CC: stable@vger.kernel.org # 5.16.x CC: stable@vger.kernel.org # 5.17 Cc: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/zoned.h | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4db17bd05a21..604a4d54cf0d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1060,6 +1060,7 @@ struct btrfs_fs_info { */ spinlock_t relocation_bg_lock; u64 data_reloc_bg; + struct mutex zoned_data_reloc_io_lock; u64 nr_global_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cebd7a78c964..20e70eb88465 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3156,6 +3156,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); + mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index cbf016a7bb5d..6dee76248cb4 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -359,7 +359,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_lock(&inode->vfs_inode, 0); + mutex_lock(&root->fs_info->zoned_data_reloc_io_lock); } static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) @@ -367,7 +367,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_unlock(&inode->vfs_inode, 0); + mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock); } #endif From 08b7cf134eafca3b38e818d934b00dfe6b5b0fb4 Mon Sep 17 00:00:00 2001 From: Wells Lu Date: Fri, 15 Apr 2022 17:41:28 +0800 Subject: [PATCH 186/803] pinctrl: Fix an error in pin-function table of SP7021 The first valid item of pin-function table should start from the third item. The first two items, due to historical and compatible reasons, should be dummy items. The two dummy items were removed accidentally in initial submission. This fix adds them back. Signed-off-by: Wells Lu Link: https://lore.kernel.org/r/1650015688-19774-1-git-send-email-wellslutw@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunplus/sppctl_sp7021.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pinctrl/sunplus/sppctl_sp7021.c b/drivers/pinctrl/sunplus/sppctl_sp7021.c index 9748345b9298..cd657760a644 100644 --- a/drivers/pinctrl/sunplus/sppctl_sp7021.c +++ b/drivers/pinctrl/sunplus/sppctl_sp7021.c @@ -419,7 +419,15 @@ static const struct sppctl_grp sp7021grps_prbp[] = { EGRP("PROBE_PORT2", 2, pins_prp2), }; +/* + * Due to compatible reason, the first valid item should start at the third + * position of the array. Please keep the first two items of the table + * no use (dummy). + */ const struct sppctl_func sppctl_list_funcs[] = { + FNCN("", pinmux_type_fpmx, 0x00, 0, 0), + FNCN("", pinmux_type_fpmx, 0x00, 0, 0), + FNCN("L2SW_CLK_OUT", pinmux_type_fpmx, 0x00, 0, 7), FNCN("L2SW_MAC_SMI_MDC", pinmux_type_fpmx, 0x00, 8, 7), FNCN("L2SW_LED_FLASH0", pinmux_type_fpmx, 0x01, 0, 7), From e74200ebf7c4f6a7a7d1be9f63833ddba251effa Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 15 Apr 2022 23:54:10 +0200 Subject: [PATCH 187/803] pinctrl: stm32: Do not call stm32_gpio_get() for edge triggered IRQs in EOI The stm32_gpio_get() should only be called for LEVEL triggered interrupts, skip calling it for EDGE triggered interrupts altogether to avoid wasting CPU cycles in EOI handler. On this platform, EDGE triggered interrupts are the majority and LEVEL triggered interrupts are the exception no less, and the CPU cycles are not abundant. Fixes: 47beed513a85b ("pinctrl: stm32: Add level interrupt support to gpio irq chip") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Fabien Dessenne Cc: Linus Walleij Cc: Marc Zyngier Cc: linux-stm32@st-md-mailman.stormreply.com Cc: linux-arm-kernel@lists.infradead.org To: linux-gpio@vger.kernel.org Link: https://lore.kernel.org/r/20220415215410.498349-1-marex@denx.de Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 9ed764731570..df1d6b466fb7 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -311,6 +311,10 @@ static void stm32_gpio_irq_trigger(struct irq_data *d) struct stm32_gpio_bank *bank = d->domain->host_data; int level; + /* Do not access the GPIO if this is not LEVEL triggered IRQ. */ + if (!(bank->irq_type[d->hwirq] & IRQ_TYPE_LEVEL_MASK)) + return; + /* If level interrupt type then retrig */ level = stm32_gpio_get(&bank->gpio_chip, d->hwirq); if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) || From 7e842d70fe599bc13594b650b2144c4b6e6d6bf1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Apr 2022 09:05:26 +0200 Subject: [PATCH 188/803] memory: renesas-rpc-if: Fix HF/OSPI data transfer in Manual Mode HyperFlash devices fail to probe: rpc-if-hyperflash rpc-if-hyperflash: probing of hyperbus device failed In HyperFlash or Octal-SPI Flash mode, the Transfer Data Enable bits (SPIDE) in the Manual Mode Enable Setting Register (SMENR) are derived from half of the transfer size, cfr. the rpcif_bits_set() helper function. However, rpcif_reg_{read,write}() does not take the bus size into account, and does not double all Manual Mode Data Register access sizes when communicating with a HyperFlash or Octal-SPI Flash device. Fix this, and avoid the back-and-forth conversion between transfer size and Transfer Data Enable bits, by explicitly storing the transfer size in struct rpcif, and using that value to determine access size in rpcif_reg_{read,write}(). Enforce that the "high" Manual Mode Read/Write Data Registers (SM[RW]DR1) are only used for 8-byte data accesses. While at it, forbid writing to the Manual Mode Read Data Registers, as they are read-only. Fixes: fff53a551db50f5e ("memory: renesas-rpc-if: Correct QSPI data transfer in Manual mode") Signed-off-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Tested-by: Lad Prabhakar Tested-by: Wolfram Sang Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/cde9bfacf704c81865f57b15d1b48a4793da4286.1649681476.git.geert+renesas@glider.be Link: https://lore.kernel.org/r/20220420070526.9367-1-krzysztof.kozlowski@linaro.org' Signed-off-by: Arnd Bergmann --- drivers/memory/renesas-rpc-if.c | 60 +++++++++++++++++++++++++-------- include/memory/renesas-rpc-if.h | 1 + 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index 2e545f473cc6..019a0822bde0 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -164,25 +164,39 @@ static const struct regmap_access_table rpcif_volatile_table = { /* - * Custom accessor functions to ensure SMRDR0 and SMWDR0 are always accessed - * with proper width. Requires SMENR_SPIDE to be correctly set before! + * Custom accessor functions to ensure SM[RW]DR[01] are always accessed with + * proper width. Requires rpcif.xfer_size to be correctly set before! */ static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val) { struct rpcif *rpc = context; - if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { - u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); - - if (spide == 0x8) { + switch (reg) { + case RPCIF_SMRDR0: + case RPCIF_SMWDR0: + switch (rpc->xfer_size) { + case 1: *val = readb(rpc->base + reg); return 0; - } else if (spide == 0xC) { + + case 2: *val = readw(rpc->base + reg); return 0; - } else if (spide != 0xF) { + + case 4: + case 8: + *val = readl(rpc->base + reg); + return 0; + + default: return -EILSEQ; } + + case RPCIF_SMRDR1: + case RPCIF_SMWDR1: + if (rpc->xfer_size != 8) + return -EILSEQ; + break; } *val = readl(rpc->base + reg); @@ -193,18 +207,34 @@ static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val) { struct rpcif *rpc = context; - if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { - u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); - - if (spide == 0x8) { + switch (reg) { + case RPCIF_SMWDR0: + switch (rpc->xfer_size) { + case 1: writeb(val, rpc->base + reg); return 0; - } else if (spide == 0xC) { + + case 2: writew(val, rpc->base + reg); return 0; - } else if (spide != 0xF) { + + case 4: + case 8: + writel(val, rpc->base + reg); + return 0; + + default: return -EILSEQ; } + + case RPCIF_SMWDR1: + if (rpc->xfer_size != 8) + return -EILSEQ; + break; + + case RPCIF_SMRDR0: + case RPCIF_SMRDR1: + return -EPERM; } writel(val, rpc->base + reg); @@ -469,6 +499,7 @@ int rpcif_manual_xfer(struct rpcif *rpc) smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)); regmap_write(rpc->regmap, RPCIF_SMENR, smenr); + rpc->xfer_size = nbytes; memcpy(data, rpc->buffer + pos, nbytes); if (nbytes == 8) { @@ -533,6 +564,7 @@ int rpcif_manual_xfer(struct rpcif *rpc) regmap_write(rpc->regmap, RPCIF_SMENR, smenr); regmap_write(rpc->regmap, RPCIF_SMCR, rpc->smcr | RPCIF_SMCR_SPIE); + rpc->xfer_size = nbytes; ret = wait_msg_xfer_end(rpc); if (ret) goto err_out; diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h index 7c93f5177532..9c0ad64b8d29 100644 --- a/include/memory/renesas-rpc-if.h +++ b/include/memory/renesas-rpc-if.h @@ -72,6 +72,7 @@ struct rpcif { enum rpcif_type type; enum rpcif_data_dir dir; u8 bus_size; + u8 xfer_size; void *buffer; u32 xferlen; u32 smcr; From 90f21460e49ad60caabece95cc0ca14d9d6d099d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 20 Apr 2022 12:47:08 +0200 Subject: [PATCH 189/803] MAINTAINERS: add Bug entry for Samsung and memory controller drivers Add a Bug sections, indicating preferred mailing method for bug reports, to Samsung SoC related entries and memory controller drivers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220420104708.106738-1-krzysztof.kozlowski@linaro.org' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index edf0bf37de8a..573a1cf1645a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2643,6 +2643,7 @@ L: linux-samsung-soc@vger.kernel.org S: Maintained C: irc://irc.libera.chat/linux-exynos Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/ +B: mailto:linux-samsung-soc@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git F: Documentation/arm/samsung/ F: Documentation/devicetree/bindings/arm/samsung/ @@ -11971,6 +11972,7 @@ M: Krzysztof Kozlowski M: Bartlomiej Zolnierkiewicz L: linux-pm@vger.kernel.org S: Supported +B: mailto:linux-samsung-soc@vger.kernel.org F: Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml F: Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml F: drivers/power/supply/max14577_charger.c @@ -11982,6 +11984,7 @@ M: Krzysztof Kozlowski M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org S: Supported +B: mailto:linux-samsung-soc@vger.kernel.org F: Documentation/devicetree/bindings/*/maxim,max14577.yaml F: Documentation/devicetree/bindings/*/maxim,max77686.yaml F: Documentation/devicetree/bindings/*/maxim,max77693.yaml @@ -12675,6 +12678,7 @@ MEMORY CONTROLLER DRIVERS M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained +B: mailto:krzysztof.kozlowski@linaro.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git F: Documentation/devicetree/bindings/memory-controllers/ F: drivers/memory/ @@ -15607,6 +15611,7 @@ L: linux-samsung-soc@vger.kernel.org S: Maintained C: irc://irc.libera.chat/linux-exynos Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/ +B: mailto:linux-samsung-soc@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git F: Documentation/devicetree/bindings/pinctrl/samsung,pinctrl*yaml F: drivers/pinctrl/samsung/ @@ -17327,6 +17332,7 @@ M: Krzysztof Kozlowski M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported +B: mailto:linux-samsung-soc@vger.kernel.org F: Documentation/devicetree/bindings/sound/samsung* F: sound/soc/samsung/ @@ -17371,6 +17377,7 @@ M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Supported +B: mailto:linux-samsung-soc@vger.kernel.org F: Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml F: Documentation/devicetree/bindings/mfd/samsung,s2m*.yaml F: Documentation/devicetree/bindings/mfd/samsung,s5m*.yaml From 8771039482d965bdc8cefd972bcabac2b76944a8 Mon Sep 17 00:00:00 2001 From: zhangqilong Date: Sat, 19 Mar 2022 10:38:22 +0800 Subject: [PATCH 190/803] usb: xhci: tegra:Fix PM usage reference leak of tegra_xusb_unpowergate_partitions pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 41a7426d25fa ("usb: xhci: tegra: Unlink power domain devices") Cc: stable Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20220319023822.145641-1-zhangqilong3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index c8af2cd2216d..996958a6565c 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1034,13 +1034,13 @@ static int tegra_xusb_unpowergate_partitions(struct tegra_xusb *tegra) int rc; if (tegra->use_genpd) { - rc = pm_runtime_get_sync(tegra->genpd_dev_ss); + rc = pm_runtime_resume_and_get(tegra->genpd_dev_ss); if (rc < 0) { dev_err(dev, "failed to enable XUSB SS partition\n"); return rc; } - rc = pm_runtime_get_sync(tegra->genpd_dev_host); + rc = pm_runtime_resume_and_get(tegra->genpd_dev_host); if (rc < 0) { dev_err(dev, "failed to enable XUSB Host partition\n"); pm_runtime_put_sync(tegra->genpd_dev_ss); From e25adcca917d7e4cdc1dc6444d0692ffda7594bf Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 5 Apr 2022 16:48:23 +0300 Subject: [PATCH 191/803] usb: typec: ucsi: Fix reuse of completion structure The role swapping completion variable is reused, so it needs to be reinitialised every time. Otherwise it will be marked as done after the first time it's used and completing immediately. Link: https://lore.kernel.org/linux-usb/20220325203959.GA19752@jackp-linux.qualcomm.com/ Fixes: 6df475f804e6 ("usb: typec: ucsi: Start using struct typec_operations") Cc: stable@vger.kernel.org Reported-and-suggested-by: Jack Pham Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220405134824.68067-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f0c2fa19f3e0..576cb0e68596 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -949,6 +949,8 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role) role == TYPEC_HOST)) goto out_unlock; + reinit_completion(&con->complete); + command = UCSI_SET_UOR | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_SET_UOR_ROLE(role); command |= UCSI_SET_UOR_ACCEPT_ROLE_SWAPS; @@ -985,6 +987,8 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) if (cur_role == role) goto out_unlock; + reinit_completion(&con->complete); + command = UCSI_SET_PDR | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_SET_PDR_ROLE(role); command |= UCSI_SET_PDR_ACCEPT_ROLE_SWAPS; From eb5d7ff3cf0d55093c619b5ad107cd5c05ce8134 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 5 Apr 2022 16:48:24 +0300 Subject: [PATCH 192/803] usb: typec: ucsi: Fix role swapping All attempts to swap the roles timed out because the completion was done without releasing the port lock. Fixing that by releasing the lock before starting to wait for the completion. Link: https://lore.kernel.org/linux-usb/037de7ac-e210-bdf5-ec7a-8c0c88a0be20@gmail.com/ Fixes: ad74b8649bea ("usb: typec: ucsi: Preliminary support for alternate modes") Cc: stable@vger.kernel.org Reported-and-tested-by: Jia-Ju Bai Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220405134824.68067-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 576cb0e68596..a6045aef0d04 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -958,14 +958,18 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) - ret = -ETIMEDOUT; + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + return 0; out_unlock: mutex_unlock(&con->lock); - return ret < 0 ? ret : 0; + return ret; } static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) @@ -996,11 +1000,13 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out_unlock; - } + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + mutex_lock(&con->lock); /* Something has gone wrong while swapping the role */ if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) != From 9e3d68f872e4f5ce40dcc5baba7e37ab7961ed74 Mon Sep 17 00:00:00 2001 From: Ren Zhijie Date: Mon, 18 Apr 2022 16:24:25 +0800 Subject: [PATCH 193/803] usb: typec: rt1719: Fix build error without CONFIG_POWER_SUPPLY Building without CONFIG_POWER_SUPPLY will fail: drivers/usb/typec/rt1719.o: In function `rt1719_psy_set_property': rt1719.c:(.text+0x10a): undefined reference to `power_supply_get_drvdata' drivers/usb/typec/rt1719.o: In function `rt1719_psy_get_property': rt1719.c:(.text+0x2c8): undefined reference to `power_supply_get_drvdata' drivers/usb/typec/rt1719.o: In function `devm_rt1719_psy_register': rt1719.c:(.text+0x3e9): undefined reference to `devm_power_supply_register' drivers/usb/typec/rt1719.o: In function `rt1719_irq_handler': rt1719.c:(.text+0xf9f): undefined reference to `power_supply_changed' drivers/usb/typec/rt1719.o: In function `rt1719_update_pwr_opmode.part.9': rt1719.c:(.text+0x657): undefined reference to `power_supply_changed' drivers/usb/typec/rt1719.o: In function `rt1719_attach': rt1719.c:(.text+0x83e): undefined reference to `power_supply_changed' Add POWER_SUPPLY dependency to Kconfig. Fixes: 25d29b980912 ("usb: typec: rt1719: Add support for Richtek RT1719") Reported-by: Hulk Robot Reviewed-by: ChiYuan Huang Reviewed-by: Heikki Krogerus Signed-off-by: Ren Zhijie Link: https://lore.kernel.org/r/20220418082425.41566-1-renzhijie2@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig index 8f921213b17d..ba24847fb245 100644 --- a/drivers/usb/typec/Kconfig +++ b/drivers/usb/typec/Kconfig @@ -56,6 +56,7 @@ config TYPEC_RT1719 tristate "Richtek RT1719 Sink Only Type-C controller driver" depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH depends on I2C + depends on POWER_SUPPLY select REGMAP_I2C help Say Y or M here if your system has Richtek RT1719 sink only From 8d084b2eae7fc5fcfc9f143cd7321a88e1cd76aa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:13 +0200 Subject: [PATCH 194/803] usb: typec: tcpm: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/usb/typec/tcpm/tcpm.c: In function ‘run_state_machine’: drivers/usb/typec/tcpm/tcpm.c:4724:3: error: case label does not reduce to an integer constant case BDO_MODE_TESTDATA: ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/20220405151517.29753-8-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_bdo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/usb/pd_bdo.h b/include/linux/usb/pd_bdo.h index 033fe3e17141..7c25b88d79f9 100644 --- a/include/linux/usb/pd_bdo.h +++ b/include/linux/usb/pd_bdo.h @@ -15,7 +15,7 @@ #define BDO_MODE_CARRIER2 (5 << 28) #define BDO_MODE_CARRIER3 (6 << 28) #define BDO_MODE_EYE (7 << 28) -#define BDO_MODE_TESTDATA (8 << 28) +#define BDO_MODE_TESTDATA (8U << 28) #define BDO_MODE_MASK(mode) ((mode) & 0xf0000000) From f085bd4bfe0907ce2fad2c787fc65871ec5ca6d6 Mon Sep 17 00:00:00 2001 From: Weitao Wango Date: Thu, 24 Mar 2022 20:17:35 +0800 Subject: [PATCH 195/803] USB: Fix ehci infinite suspend-resume loop issue in zhaoxin In zhaoxin platform, some ehci projects will latch a wakeup signal internal when plug in a device on port during system S0. This wakeup signal will turn on when ehci runtime suspend, which will trigger a system control interrupt that will resume ehci back to D0. As no device connect, ehci will be set to runtime suspend and turn on the internal latched wakeup signal again. It will cause a suspend-resume loop and generate system control interrupt continuously. Fixed this issue by clear wakeup signal latched in ehci internal when ehci resume callback is called. Acked-by: Alan Stern Signed-off-by: Weitao Wang Link: https://lore.kernel.org/r/20220324121735.3803-1-WeitaoWang-oc@zhaoxin.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 23 +++++++++++++++++++++++ drivers/usb/host/ehci-pci.c | 4 ++++ drivers/usb/host/ehci.h | 1 + 3 files changed, 28 insertions(+) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 3d82e0b853be..684164fa9716 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -1103,6 +1103,26 @@ static void ehci_remove_device(struct usb_hcd *hcd, struct usb_device *udev) #ifdef CONFIG_PM +/* Clear wakeup signal locked in zhaoxin platform when device plug in. */ +static void ehci_zx_wakeup_clear(struct ehci_hcd *ehci) +{ + u32 __iomem *reg = &ehci->regs->port_status[4]; + u32 t1 = ehci_readl(ehci, reg); + + t1 &= (u32)~0xf0000; + t1 |= PORT_TEST_FORCE; + ehci_writel(ehci, t1, reg); + t1 = ehci_readl(ehci, reg); + msleep(1); + t1 &= (u32)~0xf0000; + ehci_writel(ehci, t1, reg); + ehci_readl(ehci, reg); + msleep(1); + t1 = ehci_readl(ehci, reg); + ehci_writel(ehci, t1 | PORT_CSC, reg); + ehci_readl(ehci, reg); +} + /* suspend/resume, section 4.3 */ /* These routines handle the generic parts of controller suspend/resume */ @@ -1154,6 +1174,9 @@ int ehci_resume(struct usb_hcd *hcd, bool force_reset) if (ehci->shutdown) return 0; /* Controller is dead */ + if (ehci->zx_wakeup_clear_needed) + ehci_zx_wakeup_clear(ehci); + /* * If CF is still set and reset isn't forced * then we maintained suspend power. diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 638f03b89739..9937c5a7efc2 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -231,6 +231,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd) ehci->is_aspeed = 1; } break; + case PCI_VENDOR_ID_ZHAOXIN: + if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90) + ehci->zx_wakeup_clear_needed = 1; + break; } /* optional debug port, normally in the first BAR */ diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index fdd073cc053b..ad3f13a3eaf1 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -220,6 +220,7 @@ struct ehci_hcd { /* one per controller */ unsigned imx28_write_fix:1; /* For Freescale i.MX28 */ unsigned spurious_oc:1; unsigned is_aspeed:1; + unsigned zx_wakeup_clear_needed:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) From 0a96fa640dc928da9eaa46a22c46521b037b78ad Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 7 Apr 2022 10:40:01 +0800 Subject: [PATCH 196/803] usb: misc: fix improper handling of refcount in uss720_probe() usb_put_dev shouldn't be called when uss720_probe succeeds because of priv->usbdev. At the same time, priv->usbdev shouldn't be set to NULL before destroy_priv in uss720_disconnect because usb_put_dev is in destroy_priv. Fix this by moving priv->usbdev = NULL after usb_put_dev. Fixes: dcb4b8ad6a44 ("misc/uss720: fix memory leak in uss720_probe") Cc: stable Reviewed-by: Dongliang Mu Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20220407024001.11761-1-hbh25y@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/uss720.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 748139d26263..0be8efcda15d 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref) dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n"); usb_put_dev(priv->usbdev); + priv->usbdev = NULL; kfree(priv); } @@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf, parport_announce_port(pp); usb_set_intfdata(intf, pp); - usb_put_dev(usbdev); return 0; probe_abort: @@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf) usb_set_intfdata(intf, NULL); if (pp) { priv = pp->private_data; - priv->usbdev = NULL; priv->pp = NULL; dev_dbg(&intf->dev, "parport_remove_port\n"); parport_remove_port(pp); From 0cade7885fd5bc47039986b632f3e5585f6b7c22 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 21:23:38 +0200 Subject: [PATCH 197/803] dt-bindings: usb: samsung,exynos-usb2: add missing required reg "reg" property is required on Samsung S5PV210/Exynos EHCI/OHCI controllers. Fixes: 4bf2283cb208 ("dt-bindings: usb: samsung,exynos-usb2: convert to dtschema") Reviewed-by: Alim Akhtar Acked-by: Rob Herring Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220407192338.14849-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml index fbf07d6e707a..ef42c6fce73c 100644 --- a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml +++ b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml @@ -62,6 +62,7 @@ required: - interrupts - phys - phy-names + - reg allOf: - if: From 4e64cd7763ca52dce5dff2c88f67a200f3aa37d3 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Sat, 9 Apr 2022 11:21:15 -0400 Subject: [PATCH 198/803] usb: dwc3: fix backwards compat with rockchip devices Commit 33fb697ec7e5 ("usb: dwc3: Get clocks individually") moved from the clk_bulk api to individual clocks, following the snps,dwc3.yaml dt-binding for clock names. Unfortunately the rk3328 (and upcoming rk356x support) use the rockchip,dwc3.yaml which has different clock names, which are common on devices using the glue layer. The rk3328 does not use a glue layer, but attaches directly to the dwc3 core driver. The offending patch series failed to account for this, thus dwc3 was broken on rk3328. To retain backwards compatibility with rk3328 device trees we must also check for the alternate clock names. Fixes: 33fb697ec7e5 ("usb: dwc3: Get clocks individually") Reported-by: Frank Wunderlich Tested-By: Frank Wunderlich Reviewed-by: Heiko Stuebner Acked-by: Sean Anderson Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20220409152116.3834354-1-pgwipeout@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 1170b800acdc..5bfd3e88af35 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1690,21 +1690,44 @@ static int dwc3_probe(struct platform_device *pdev) /* * Clocks are optional, but new DT platforms should support all * clocks as required by the DT-binding. + * Some devices have different clock names in legacy device trees, + * check for them to retain backwards compatibility. */ dwc->bus_clk = devm_clk_get_optional(dev, "bus_early"); if (IS_ERR(dwc->bus_clk)) return dev_err_probe(dev, PTR_ERR(dwc->bus_clk), "could not get bus clock\n"); + if (dwc->bus_clk == NULL) { + dwc->bus_clk = devm_clk_get_optional(dev, "bus_clk"); + if (IS_ERR(dwc->bus_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->bus_clk), + "could not get bus clock\n"); + } + dwc->ref_clk = devm_clk_get_optional(dev, "ref"); if (IS_ERR(dwc->ref_clk)) return dev_err_probe(dev, PTR_ERR(dwc->ref_clk), "could not get ref clock\n"); + if (dwc->ref_clk == NULL) { + dwc->ref_clk = devm_clk_get_optional(dev, "ref_clk"); + if (IS_ERR(dwc->ref_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->ref_clk), + "could not get ref clock\n"); + } + dwc->susp_clk = devm_clk_get_optional(dev, "suspend"); if (IS_ERR(dwc->susp_clk)) return dev_err_probe(dev, PTR_ERR(dwc->susp_clk), "could not get suspend clock\n"); + + if (dwc->susp_clk == NULL) { + dwc->susp_clk = devm_clk_get_optional(dev, "suspend_clk"); + if (IS_ERR(dwc->susp_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->susp_clk), + "could not get suspend clock\n"); + } } ret = reset_control_deassert(dwc->reset); From d8bfe5091d6cc4b8b8395e4666979ae72a6069ca Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 8 Apr 2022 11:42:50 -0700 Subject: [PATCH 199/803] xhci: Enable runtime PM on second Alderlake controller Alderlake has two XHCI controllers with PCI IDs 0x461e and 0x51ed. We had previously added the quirk to default enable runtime PM for 0x461e, now add it for 0x51ed as well. Signed-off-by: Evan Green Cc: stable Link: https://lore.kernel.org/r/20220408114225.1.Ibcff6b86ed4eacfe4c4bc89c90e18416f3900a3e@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c351970cdf1..d7e0e6ebf080 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -266,7 +267,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && From 456244aeecd54249096362a173dfe06b82a5cafa Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 19 Apr 2022 16:12:45 +0800 Subject: [PATCH 200/803] usb: mtu3: fix USB 3.0 dual-role-switch from device to host Issue description: When an OTG port has been switched to device role and then switch back to host role again, the USB 3.0 Host (XHCI) will not be able to detect "plug in event of a connected USB 2.0/1.0 ((Highspeed and Fullspeed) devices until system reboot. Root cause and Solution: There is a condition checking flag "ssusb->otg_switch.is_u3_drd" in toggle_opstate(). At the end of role switch procedure, toggle_opstate() will be called to set DC_SESSION and SOFT_CONN bit. If "is_u3_drd" was set and switched the role to USB host 3.0, bit DC_SESSION and SOFT_CONN will be skipped hence caused the port cannot detect connected USB 2.0 (Highspeed and Fullspeed) devices. Simply remove the condition check to solve this issue. Fixes: d0ed062a8b75 ("usb: mtu3: dual-role mode support") Cc: stable@vger.kernel.org Tested-by: Fabien Parent Reviewed-by: Chunfeng Yun Signed-off-by: Macpaul Lin Signed-off-by: Tainping Fang Link: https://lore.kernel.org/r/20220419081245.21015-1-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_dr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c index a6b04831b20b..9b8aded3d95e 100644 --- a/drivers/usb/mtu3/mtu3_dr.c +++ b/drivers/usb/mtu3/mtu3_dr.c @@ -21,10 +21,8 @@ static inline struct ssusb_mtk *otg_sx_to_ssusb(struct otg_switch_mtk *otg_sx) static void toggle_opstate(struct ssusb_mtk *ssusb) { - if (!ssusb->otg_switch.is_u3_drd) { - mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); - mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); - } + mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); + mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); } /* only port0 supports dual-role mode */ From f28ad9069363dec7deb88032b70612755eed9ee6 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 11 Apr 2022 18:33:47 -0700 Subject: [PATCH 201/803] usb: dwc3: core: Fix tx/rx threshold settings The current driver logic checks against 0 to determine whether the periodic tx/rx threshold settings are set, but we may get bogus values from uninitialized variables if no device property is set. Properly default these variables to 0. Fixes: 938a5ad1d305 ("usb: dwc3: Check for ESS TX/RX threshold config") Cc: Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/cccfce990b11b730b0dae42f9d217dc6fb988c90.1649727139.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 5bfd3e88af35..1ca9dae57855 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1377,10 +1377,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 lpm_nyet_threshold; u8 tx_de_emphasis; u8 hird_threshold; - u8 rx_thr_num_pkt_prd; - u8 rx_max_burst_prd; - u8 tx_thr_num_pkt_prd; - u8 tx_max_burst_prd; + u8 rx_thr_num_pkt_prd = 0; + u8 rx_max_burst_prd = 0; + u8 tx_thr_num_pkt_prd = 0; + u8 tx_max_burst_prd = 0; u8 tx_fifo_resize_max_num; const char *usb_psy_name; int ret; From ab7aa2866d295438dc60522f85c5421c6b4f1507 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Mon, 11 Apr 2022 17:53:00 +0200 Subject: [PATCH 202/803] usb: dwc3: Try usb-role-switch first in dwc3_drd_init If the PHY controller node has a "port" dwc3 tries to find an extcon device even when "usb-role-switch" is present. This happens because dwc3_get_extcon() sees that "port" node and then calls extcon_find_edev_by_node() which will always return EPROBE_DEFER in that case. On the other hand, even if an extcon was present and dwc3_get_extcon() was successful it would still be ignored in favor of "usb-role-switch". Let's just first check if "usb-role-switch" is configured in the device tree and directly use it instead and only try to look for an extcon device otherwise. Fixes: 8a0a13799744 ("usb: dwc3: Registering a role switch in the DRD code.") Cc: stable Signed-off-by: Sven Peter Link: https://lore.kernel.org/r/20220411155300.9766-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/drd.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index b60b5f7b6dff..8cad9e7d3368 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -584,16 +584,15 @@ int dwc3_drd_init(struct dwc3 *dwc) { int ret, irq; + if (ROLE_SWITCH && + device_property_read_bool(dwc->dev, "usb-role-switch")) + return dwc3_setup_role_switch(dwc); + dwc->edev = dwc3_get_extcon(dwc); if (IS_ERR(dwc->edev)) return PTR_ERR(dwc->edev); - if (ROLE_SWITCH && - device_property_read_bool(dwc->dev, "usb-role-switch")) { - ret = dwc3_setup_role_switch(dwc); - if (ret < 0) - return ret; - } else if (dwc->edev) { + if (dwc->edev) { dwc->edev_nb.notifier_call = dwc3_drd_notifier; ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST, &dwc->edev_nb); From 0543e4e8852ef5ff1809ae62f1ea963e2ab23b66 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Fri, 1 Apr 2022 00:47:00 +0300 Subject: [PATCH 203/803] usb: core: Don't hold the device lock while sleeping in do_proc_control() Since commit ae8709b296d8 ("USB: core: Make do_proc_control() and do_proc_bulk() killable") if a device has the USB_QUIRK_DELAY_CTRL_MSG quirk set, it will temporarily block all other URBs (e.g. interrupts) while sleeping due to a control. This results in noticeable delays when, for example, a userspace usbfs application is sending URB interrupts at a high rate to a keyboard and simultaneously updates the lock indicators using controls. Interrupts with direction set to IN are also affected by this, meaning that delivery of HID reports (containing scancodes) to the usbfs application is delayed as well. This patch fixes the regression by calling msleep() while the device mutex is unlocked, as was the case originally with usb_control_msg(). Fixes: ae8709b296d8 ("USB: core: Make do_proc_control() and do_proc_bulk() killable") Cc: stable Acked-by: Alan Stern Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/3e299e2a-13b9-ddff-7fee-6845e868bc06@tasossah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6abb7294e919..b5b85bf80329 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1209,12 +1209,16 @@ static int do_proc_control(struct usb_dev_state *ps, usb_unlock_device(dev); i = usbfs_start_wait_urb(urb, tmo, &actlen); + + /* Linger a bit, prior to the next control message. */ + if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) + msleep(200); usb_lock_device(dev); snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen); if (!i && actlen) { if (copy_to_user(ctrl->data, tbuf, actlen)) { ret = -EFAULT; - goto recv_fault; + goto done; } } } else { @@ -1231,6 +1235,10 @@ static int do_proc_control(struct usb_dev_state *ps, usb_unlock_device(dev); i = usbfs_start_wait_urb(urb, tmo, &actlen); + + /* Linger a bit, prior to the next control message. */ + if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) + msleep(200); usb_lock_device(dev); snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0); } @@ -1242,10 +1250,6 @@ static int do_proc_control(struct usb_dev_state *ps, } ret = (i < 0 ? i : actlen); - recv_fault: - /* Linger a bit, prior to the next control message. */ - if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) - msleep(200); done: kfree(dr); usb_free_urb(urb); From 929b22e669b15fc9a2ab110ea27f0c489ed92beb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Apr 2022 11:59:15 +0200 Subject: [PATCH 204/803] usb: misc: eud: Fix an error handling path in eud_probe() It is odd to call devm_add_action_or_reset() before calling the function that should be undone. Either, the "_or_reset" part should be omitted, or the action should be recorded after the resources have been allocated. Switch the order of devm_add_action_or_reset() and usb_role_switch_get(). Fixes: 9a1bf58ccd44 ("usb: misc: eud: Add driver support for Embedded USB Debugger(EUD)") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/362908699275ecec078381b42d87c817c6965fc6.1648979948.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/qcom_eud.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/misc/qcom_eud.c b/drivers/usb/misc/qcom_eud.c index f929bffdc5d1..b7f13df00764 100644 --- a/drivers/usb/misc/qcom_eud.c +++ b/drivers/usb/misc/qcom_eud.c @@ -186,16 +186,16 @@ static int eud_probe(struct platform_device *pdev) chip->dev = &pdev->dev; - ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip); - if (ret) - return dev_err_probe(chip->dev, ret, - "failed to add role switch release action\n"); - chip->role_sw = usb_role_switch_get(&pdev->dev); if (IS_ERR(chip->role_sw)) return dev_err_probe(chip->dev, PTR_ERR(chip->role_sw), "failed to get role switch\n"); + ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip); + if (ret) + return dev_err_probe(chip->dev, ret, + "failed to add role switch release action\n"); + chip->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(chip->base)) return PTR_ERR(chip->base); From bf95c4d4630c7a2c16e7b424fdea5177d9ce0864 Mon Sep 17 00:00:00 2001 From: Vijayavardhan Vennapusa Date: Wed, 13 Apr 2022 16:10:38 -0500 Subject: [PATCH 205/803] usb: gadget: configfs: clear deactivation flag in configfs_composite_unbind() If any function like UVC is deactivating gadget as part of composition switch which results in not calling pullup enablement, it is not getting enabled after switch to new composition due to this deactivation flag not cleared. This results in USB enumeration not happening after switch to new USB composition. Hence clear deactivation flag inside gadget structure in configfs_composite_unbind() before switch to new USB composition. Signed-off-by: Vijayavardhan Vennapusa Signed-off-by: Dan Vacura Cc: stable Link: https://lore.kernel.org/r/20220413211038.72797-1-w36195@motorola.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 1fb837d9271e..84b73cb03f87 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1438,6 +1438,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) usb_ep_autoconfig_reset(cdev->gadget); spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; + cdev->deactivations = 0; + gadget->deactivated = false; set_gadget_data(gadget, NULL); spin_unlock_irqrestore(&gi->spinlock, flags); } From eb5773201b1c5d603424bd21f161c8c2d1075b42 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 21 Apr 2022 11:23:28 -0500 Subject: [PATCH 206/803] ASoC: soc-ops: fix error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cppcheck throws the following warning: sound/soc/soc-ops.c:461:8: style: Variable 'ret' is assigned a value that is never used. [unreadVariable] ret = err; ^ This seems to be a missing change in the return value. Fixes: 7f3d90a351968 ("ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw_sx()") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220421162328.302017-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index a0ca58ba1627..58347eadd219 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -461,7 +461,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; } } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx); From c26830b6c5c534d273ce007eb33d5a2d2ad4e969 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 21 Apr 2022 17:57:24 +0200 Subject: [PATCH 207/803] ASoC: meson: axg-tdm-interface: Fix formatters in trigger" This reverts commit bf5e4887eeddb48480568466536aa08ec7f179a5 because the following and required commit e138233e56e9829e65b6293887063a1a3ccb2d68 causes the following system crash when using audio: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:282 Fixes: bf5e4887eeddb4848056846 ("ASoC: meson: axg-tdm-interface: manage formatters in trigger") Reported-by: Dmitry Shmidt Signed-off-by: Neil Armstrong Acked-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421155725.2589089-1-narmstrong@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-interface.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 0c31934a9630..e076ced30025 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -351,29 +351,13 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, - int cmd, +static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = - snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - axg_tdm_stream_start(ts); - break; - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - case SNDRV_PCM_TRIGGER_STOP: - axg_tdm_stream_stop(ts); - break; - default: - return -EINVAL; - } - - return 0; + /* Force all attached formatters to update */ + return axg_tdm_stream_reset(ts); } static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) @@ -413,8 +397,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, + .prepare = axg_tdm_iface_prepare, .hw_free = axg_tdm_iface_hw_free, - .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ From 0c9b152c72e53016e96593bdbb8cffe2176694b9 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 21 Apr 2022 17:57:25 +0200 Subject: [PATCH 208/803] ASoC: meson: axg-card: Fix nonatomic links This commit e138233e56e9829e65b6293887063a1a3ccb2d68 causes the following system crash when using audio on G12A/G12B & SM1 systems: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:282 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/0 preempt_count: 10001, expected: 0 RCU nest depth: 0, expected: 0 Preemption disabled at: schedule_preempt_disabled+0x20/0x2c mutex_lock+0x24/0x60 _snd_pcm_stream_lock_irqsave+0x20/0x3c snd_pcm_period_elapsed+0x24/0xa4 axg_fifo_pcm_irq_block+0x64/0xdc __handle_irq_event_percpu+0x104/0x264 handle_irq_event+0x48/0xb4 ... start_kernel+0x3f0/0x484 __primary_switched+0xc0/0xc8 Revert this commit until the crash is fixed. Fixes: e138233e56e9829e65b6 ("ASoC: meson: axg-card: make links nonatomic") Reported-by: Dmitry Shmidt Signed-off-by: Neil Armstrong Acked-by: Jerome Brunet Link: https://lore.kernel.org/r/20220421155725.2589089-2-narmstrong@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index cbbaa55d92a6..2b77010c2c5c 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -320,7 +320,6 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; - dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node, &dai_link->cpus->dai_name); From fc45e55ebc58dbf622cb89ddbf797589c7a5510b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 21 Apr 2022 16:36:34 +0300 Subject: [PATCH 209/803] ACPI: processor: idle: Avoid falling back to C3 type C-states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "safe state" index is used by acpi_idle_enter_bm() to avoid entering a C-state that may require bus mastering to be disabled on entry in the cases when this is not going to happen. For this reason, it should not be set to point to C3 type of C-states, because they may require bus mastering to be disabled on entry in principle. This was broken by commit d6b88ce2eb9d ("ACPI: processor idle: Allow playing dead in C3 state") which inadvertently allowed the "safe state" index to point to C3 type of C-states. This results in a machine that won't boot past the point when it first enters C3. Restore the correct behaviour (either demote to C1/C2, or use C3 but also set ARB_DIS=1). I hit this on a Fujitsu Siemens Lifebook S6010 (P3) machine. Fixes: d6b88ce2eb9d ("ACPI: processor idle: Allow playing dead in C3 state") Cc: 5.16+ # 5.16+ Signed-off-by: Ville Syrjälä Tested-by: Woody Suwalski [ rjw: Subject and changelog adjustments ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4556c86c3465..5f296e099bce 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -795,7 +795,8 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { state->enter_dead = acpi_idle_play_dead; - drv->safe_state_index = count; + if (cx->type != ACPI_STATE_C3) + drv->safe_state_index = count; } /* * Halt-induced C1 is not good for ->enter_s2idle, because it From 20e582e16af24b074e583f9551fad557882a3c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 20 Apr 2022 16:44:17 +0300 Subject: [PATCH 210/803] Revert "ACPI: processor: idle: fix lockup regression on 32-bit ThinkPad T40" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bfe55a1f7fd6bfede16078bf04c6250fbca11588. This was presumably misdiagnosed as an inability to use C3 at all when I suspect the real problem is just misconfiguration of C3 vs. ARB_DIS. Signed-off-by: Ville Syrjälä Cc: 5.16+ # 5.16+ Tested-by: Woody Suwalski Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 5f296e099bce..eb95e188d62b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -96,11 +96,6 @@ static const struct dmi_system_id processor_power_dmi_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")}, (void *)1}, - /* T40 can not handle C3 idle state */ - { set_max_cstate, "IBM ThinkPad T40", { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")}, - (void *)2}, {}, }; From d0f6cfb2bd165b0aa307750e07e03420859bd554 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Apr 2022 09:55:04 -0700 Subject: [PATCH 211/803] thermal: int340x: Fix attr.show callback prototype Control Flow Integrity (CFI) instrumentation of the kernel noticed that the caller, dev_attr_show(), and the callback, odvp_show(), did not have matching function prototypes, which would cause a CFI exception to be raised. Correct the prototype by using struct device_attribute instead of struct kobj_attribute. Reported-and-tested-by: Joao Moreira Link: https://lore.kernel.org/lkml/067ce8bd4c3968054509831fa2347f4f@overdrivepizza.com/ Fixes: 006f006f1e5c ("thermal/int340x_thermal: Export OEM vendor variables") Cc: 5.8+ # 5.8+ Signed-off-by: Kees Cook Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4954800b9850..d97f496bab9b 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -68,7 +68,7 @@ static int evaluate_odvp(struct int3400_thermal_priv *priv); struct odvp_attr { int odvp; struct int3400_thermal_priv *priv; - struct kobj_attribute attr; + struct device_attribute attr; }; static ssize_t data_vault_read(struct file *file, struct kobject *kobj, @@ -311,7 +311,7 @@ end: return result; } -static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr, +static ssize_t odvp_show(struct device *dev, struct device_attribute *attr, char *buf) { struct odvp_attr *odvp_attr; From 4d8ec91208196e0e19195f1e7d6be9de5873f242 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Apr 2022 16:47:02 -0700 Subject: [PATCH 212/803] f2fs: should not truncate blocks during roll-forward recovery If the file preallocated blocks and fsync'ed, we should not truncate them during roll-forward recovery which will recover i_size correctly back. Fixes: d4dd19ec1ea0 ("f2fs: do not expose unwritten blocks to user by DIO") Cc: # 5.17+ Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 71f232dcf3c2..83639238a1fe 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -550,7 +550,8 @@ make_now: } f2fs_set_inode_flags(inode); - if (file_should_truncate(inode)) { + if (file_should_truncate(inode) && + !is_sbi_flag_set(sbi, SBI_POR_DOING)) { ret = f2fs_truncate(inode); if (ret) goto bad_inode; From 37843d0f6e7a23af19a6cbe68b9503d318fe1a29 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 11 Apr 2022 08:23:41 +0100 Subject: [PATCH 213/803] clk: microchip: mpfs: don't reset disabled peripherals The current clock driver for PolarFire SoC puts the hardware behind "periph" clocks into reset if their clock is disabled. CONFIG_PM was recently added to the riscv defconfig and exposed issues caused by this behaviour, where the Cadence GEM was being put into reset between its bringup & the PHY bringup: https://lore.kernel.org/linux-riscv/9f4b057d-1985-5fd3-65c0-f944161c7792@microchip.com/ Fix this (for now) by removing the reset from mpfs_periph_clk_disable. Fixes: 635e5e73370e ("clk: microchip: Add driver for Microchip PolarFire SoC") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220411072340.740981-1-conor.dooley@microchip.com Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-mpfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index aa1561b773d6..744ef2ba2a0c 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -200,10 +200,6 @@ static void mpfs_periph_clk_disable(struct clk_hw *hw) spin_lock_irqsave(&mpfs_clk_lock, flags); - reg = readl_relaxed(base_addr + REG_SUBBLK_RESET_CR); - val = reg | (1u << periph->shift); - writel_relaxed(val, base_addr + REG_SUBBLK_RESET_CR); - reg = readl_relaxed(base_addr + REG_SUBBLK_CLOCK_CR); val = reg & ~(1u << periph->shift); writel_relaxed(val, base_addr + REG_SUBBLK_CLOCK_CR); From d968fda3de91ec2f250ba27149cb1b5e9516415f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 20 Apr 2022 02:54:47 +0300 Subject: [PATCH 214/803] clk: qcom: clk-rcg2: fix gfx3d frequency calculation Since the commit 948fb0969eae ("clk: Always clamp the rounded rate"), the clk_core_determine_round_nolock() would clamp the requested rate between min and max rates from the rate request. Normally these fields would be filled by clk_core_get_boundaries() called from clk_round_rate(). However clk_gfx3d_determine_rate() uses a manually crafted rate request, which did not have these fields filled. Thus the requested frequency would be clamped to 0, resulting in weird frequencies being requested from the hardware. Fix this by filling min_rate and max_rate to the values valid for the respective PLLs (0 and ULONG_MAX). Fixes: 948fb0969eae ("clk: Always clamp the rounded rate") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220419235447.1586192-1-dmitry.baryshkov@linaro.org Reviewed-by: Bjorn Andersson Reported-by: Rob Clark Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rcg2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index f675fd969c4d..e9c357309fd9 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -818,7 +818,7 @@ EXPORT_SYMBOL_GPL(clk_pixel_ops); static int clk_gfx3d_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { - struct clk_rate_request parent_req = { }; + struct clk_rate_request parent_req = { .min_rate = 0, .max_rate = ULONG_MAX }; struct clk_rcg2_gfx3d *cgfx = to_clk_rcg2_gfx3d(hw); struct clk_hw *xo, *p0, *p1, *p2; unsigned long p0_rate; From 60cc5468daaefc18ffc081dc484bdaa1bd270561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 21 Apr 2022 14:32:54 -0300 Subject: [PATCH 215/803] =?UTF-8?q?futex:=20MAINTAINERS,=20.mailmap:=20Upd?= =?UTF-8?q?ate=20Andr=C3=A9's=20email=20address?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update futex entry to use my new professional email address. Signed-off-by: André Almeida Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220421173254.29855-1-andrealmeid@igalia.com --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 93458154ce7d..ea1ba4a9a77e 100644 --- a/.mailmap +++ b/.mailmap @@ -45,6 +45,7 @@ Andrey Konovalov Andrey Ryabinin Andrey Ryabinin Andrzej Hajda +André Almeida Andy Adamson Antoine Tenart Antoine Tenart diff --git a/MAINTAINERS b/MAINTAINERS index 40fa1955ca3f..35dea3d12981 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8109,7 +8109,7 @@ M: Ingo Molnar R: Peter Zijlstra R: Darren Hart R: Davidlohr Bueso -R: André Almeida +R: André Almeida L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core From 214cab6f8020a9ad4a5e9862a4e68088d5a79f08 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 18 Apr 2022 21:20:16 +0000 Subject: [PATCH 216/803] MAINTAINERS: Update email address for John Stultz I've switched jobs, so update my email address in MAINTAINERS Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Acked-by: Sumit Semwal Link: https://lore.kernel.org/r/20220418212016.2669086-1-jstultz@google.com --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 40fa1955ca3f..c2e1b7202449 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5914,7 +5914,7 @@ R: Benjamin Gaignard R: Liam Mark R: Laura Abbott R: Brian Starkey -R: John Stultz +R: John Stultz L: linux-media@vger.kernel.org L: dri-devel@lists.freedesktop.org L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers) @@ -6584,7 +6584,7 @@ F: drivers/gpu/drm/gma500/ DRM DRIVERS FOR HISILICON M: Xinliang Liu M: Tian Tao -R: John Stultz +R: John Stultz R: Xinwei Kong R: Chen Feng L: dri-devel@lists.freedesktop.org @@ -8845,7 +8845,7 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt F: drivers/net/ethernet/hisilicon/ HIKEY960 ONBOARD USB GPIO HUB DRIVER -M: John Stultz +M: John Stultz L: linux-kernel@vger.kernel.org S: Maintained F: drivers/misc/hisi_hikey_usb.c @@ -19784,7 +19784,7 @@ F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER -M: John Stultz +M: John Stultz M: Thomas Gleixner R: Stephen Boyd L: linux-kernel@vger.kernel.org From a6ac60b36dade525c13c5bb0838589619533efb7 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Apr 2022 15:39:37 +0800 Subject: [PATCH 217/803] ALSA: hda/realtek: Fix mute led issue on thinkpad with cs35l41 s-codec The quirk ALC287_FIXUP_CS35L41_I2C_2 needs to chain the quirk ALC269_FIXUP_THINKPAD_ACPI, otherwise the mute led will not work if a thinkpad machine applies that quirk. And it will be safe if non-thinkpad machines apply that quirk since hda_fixup_thinkpad_acpi() will check and return in this case. Fixes: ae7abe36e352e ("ALSA: hda/realtek: Add CS35L41 support for Thinkpad laptops") Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20220422073937.10073-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4c0c593f3c0a..f9c3b2c9ca12 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8769,6 +8769,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_CS35L41_I2C_2] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, }, [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, From 5f5d8890789c90470d9571a283f0b789acd594af Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Fri, 22 Apr 2022 17:08:43 +0800 Subject: [PATCH 218/803] ALSA: hda/realtek: Enable mute/micmute LEDs support for HP Laptops On HP Laptops, requires the same ALC285_FIXUP_HP_GPIO_LED quirk to make its audio LEDs work. So apply the quirk, and make it the last one since it's an LED quirk. Signed-off-by: Andy Chi Fixes: 07bcab93946c ("ALSA: hda/realtek: Add support for HP Laptops") Link: https://lore.kernel.org/r/20220422090845.230071-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f9c3b2c9ca12..c65d3dbc6cc9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9025,12 +9025,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), From d48fea8401cfa942c67cc3a522bf379143dbb576 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Mon, 18 Apr 2022 10:58:34 +0000 Subject: [PATCH 219/803] net: cosa: fix error check return value of register_chrdev() If major equal 0, register_chrdev() returns error code when it fails. This function dynamically allocate a major and return its number on success, so we should use "< 0" to check it instead of "!". Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Acked-By: Jan "Yenya" Kasprzak Signed-off-by: David S. Miller --- drivers/net/wan/cosa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 23d2954d9747..1e5672019922 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -349,7 +349,7 @@ static int __init cosa_init(void) } } else { cosa_major = register_chrdev(0, "cosa", &cosa_fops); - if (!cosa_major) { + if (cosa_major < 0) { pr_warn("unable to register chardev\n"); err = -EIO; goto out; From c087c6e7b551b7f208c0b852304f044954cf2bb3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Apr 2022 17:03:40 +0200 Subject: [PATCH 220/803] objtool: Fix type of reloc::addend Elf{32,64}_Rela::r_addend is of type: Elf{32,64}_Sword, that means that our reloc::addend needs to be long or face tuncation issues when we do elf_rebuild_reloc_section(): - 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt+0x80000067 + 107: 48 b8 00 00 00 00 00 00 00 00 movabs $0x0,%rax 109: R_X86_64_64 level4_kernel_pgt-0x7fffff99 Fixes: 627fce14809b ("objtool: Add ORC unwind table generation") Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20220419203807.596871927@infradead.org --- tools/objtool/check.c | 8 ++++---- tools/objtool/elf.c | 2 +- tools/objtool/include/objtool/elf.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f10653eb5c2..3f6785415894 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -559,12 +559,12 @@ static int add_dead_ends(struct objtool_file *file) else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find unreachable insn at %s+0x%x", + WARN("can't find unreachable insn at %s+0x%lx", reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find unreachable insn at %s+0x%x", + WARN("can't find unreachable insn at %s+0x%lx", reloc->sym->sec->name, reloc->addend); return -1; } @@ -594,12 +594,12 @@ reachable: else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find reachable insn at %s+0x%x", + WARN("can't find reachable insn at %s+0x%lx", reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find reachable insn at %s+0x%x", + WARN("can't find reachable insn at %s+0x%lx", reloc->sym->sec->name, reloc->addend); return -1; } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d7b99a737496..0cfe84ac4cdb 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -546,7 +546,7 @@ static struct section *elf_create_reloc_section(struct elf *elf, int reltype); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, - unsigned int type, struct symbol *sym, int addend) + unsigned int type, struct symbol *sym, long addend) { struct reloc *reloc; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 22ba7e2b816e..9b36802ed86f 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -73,7 +73,7 @@ struct reloc { struct symbol *sym; unsigned long offset; unsigned int type; - int addend; + long addend; int idx; bool jump_table_start; }; @@ -135,7 +135,7 @@ struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, - unsigned int type, struct symbol *sym, int addend); + unsigned int type, struct symbol *sym, long addend); int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct section *insn_sec, unsigned long insn_off); From 4abff6d48dbcea8200c7ea35ba70c242d128ebf3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Apr 2022 17:03:36 +0200 Subject: [PATCH 221/803] objtool: Fix code relocs vs weak symbols Occasionally objtool driven code patching (think .static_call_sites .retpoline_sites etc..) goes sideways and it tries to patch an instruction that doesn't match. Much head-scatching and cursing later the problem is as outlined below and affects every section that objtool generates for us, very much including the ORC data. The below uses .static_call_sites because it's convenient for demonstration purposes, but as mentioned the ORC sections, .retpoline_sites and __mount_loc are all similarly affected. Consider: foo-weak.c: extern void __SCT__foo(void); __attribute__((weak)) void foo(void) { return __SCT__foo(); } foo.c: extern void __SCT__foo(void); extern void my_foo(void); void foo(void) { my_foo(); return __SCT__foo(); } These generate the obvious code (gcc -O2 -fcf-protection=none -fno-asynchronous-unwind-tables -c foo*.c): foo-weak.o: 0000000000000000 : 0: e9 00 00 00 00 jmpq 5 1: R_X86_64_PLT32 __SCT__foo-0x4 foo.o: 0000000000000000 : 0: 48 83 ec 08 sub $0x8,%rsp 4: e8 00 00 00 00 callq 9 5: R_X86_64_PLT32 my_foo-0x4 9: 48 83 c4 08 add $0x8,%rsp d: e9 00 00 00 00 jmpq 12 e: R_X86_64_PLT32 __SCT__foo-0x4 Now, when we link these two files together, you get something like (ld -r -o foos.o foo-weak.o foo.o): foos.o: 0000000000000000 : 0: e9 00 00 00 00 jmpq 5 1: R_X86_64_PLT32 __SCT__foo-0x4 5: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) f: 90 nop 0000000000000010 : 10: 48 83 ec 08 sub $0x8,%rsp 14: e8 00 00 00 00 callq 19 15: R_X86_64_PLT32 my_foo-0x4 19: 48 83 c4 08 add $0x8,%rsp 1d: e9 00 00 00 00 jmpq 22 1e: R_X86_64_PLT32 __SCT__foo-0x4 Noting that ld preserves the weak function text, but strips the symbol off of it (hence objdump doing that funny negative offset thing). This does lead to 'interesting' unused code issues with objtool when ran on linked objects, but that seems to be working (fingers crossed). So far so good.. Now lets consider the objtool static_call output section (readelf output, old binutils): foo-weak.o: Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 0 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 foo.o: Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 foos.o: Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 0 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 .text + 1d 000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 So we have two patch sites, one in the dead code of the weak foo and one in the real foo. All is well. *HOWEVER*, when the toolchain strips unused section symbols it generates things like this (using new enough binutils): foo-weak.o: Relocation section '.rela.static_call_sites' at offset 0x2c8 contains 1 entry: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + 0 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 foo.o: Relocation section '.rela.static_call_sites' at offset 0x310 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 foo + d 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 foos.o: Relocation section '.rela.static_call_sites' at offset 0x430 contains 4 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000100000002 R_X86_64_PC32 0000000000000000 foo + 0 0000000000000004 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 0000000000000008 0000000100000002 R_X86_64_PC32 0000000000000000 foo + d 000000000000000c 0000000d00000002 R_X86_64_PC32 0000000000000000 __SCT__foo + 1 And now we can see how that foos.o .static_call_sites goes side-ways, we now have _two_ patch sites in foo. One for the weak symbol at foo+0 (which is no longer a static_call site!) and one at foo+d which is in fact the right location. This seems to happen when objtool cannot find a section symbol, in which case it falls back to any other symbol to key off of, however in this case that goes terribly wrong! As such, teach objtool to create a section symbol when there isn't one. Fixes: 44f6a7c0755d ("objtool: Fix seg fault with Clang non-section symbols") Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20220419203807.655552918@infradead.org --- tools/objtool/elf.c | 187 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 165 insertions(+), 22 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0cfe84ac4cdb..ebf2ba5755c1 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -575,37 +575,180 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, return 0; } +/* + * Ensure that any reloc section containing references to @sym is marked + * changed such that it will get re-generated in elf_rebuild_reloc_sections() + * with the new symbol index. + */ +static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) { + struct reloc *reloc; + + if (sec->changed) + continue; + + list_for_each_entry(reloc, &sec->reloc_list, list) { + if (reloc->sym == sym) { + sec->changed = true; + break; + } + } + } +} + +/* + * Move the first global symbol, as per sh_info, into a new, higher symbol + * index. This fees up the shndx for a new local symbol. + */ +static int elf_move_global_symbol(struct elf *elf, struct section *symtab, + struct section *symtab_shndx) +{ + Elf_Data *data, *shndx_data = NULL; + Elf32_Word first_non_local; + struct symbol *sym; + Elf_Scn *s; + + first_non_local = symtab->sh.sh_info; + + sym = find_symbol_by_index(elf, first_non_local); + if (!sym) { + WARN("no non-local symbols !?"); + return first_non_local; + } + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return -1; + } + + data->d_buf = &sym->sym; + data->d_size = sizeof(sym->sym); + data->d_align = 1; + data->d_type = ELF_T_SYM; + + sym->idx = symtab->sh.sh_size / sizeof(sym->sym); + elf_dirty_reloc_sym(elf, sym); + + symtab->sh.sh_info += 1; + symtab->sh.sh_size += data->d_size; + symtab->changed = true; + + if (symtab_shndx) { + s = elf_getscn(elf->elf, symtab_shndx->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + shndx_data = elf_newdata(s); + if (!shndx_data) { + WARN_ELF("elf_newshndx_data"); + return -1; + } + + shndx_data->d_buf = &sym->sec->idx; + shndx_data->d_size = sizeof(Elf32_Word); + shndx_data->d_align = 4; + shndx_data->d_type = ELF_T_WORD; + + symtab_shndx->sh.sh_size += 4; + symtab_shndx->changed = true; + } + + return first_non_local; +} + +static struct symbol * +elf_create_section_symbol(struct elf *elf, struct section *sec) +{ + struct section *symtab, *symtab_shndx; + Elf_Data *shndx_data = NULL; + struct symbol *sym; + Elf32_Word shndx; + + symtab = find_section_by_name(elf, ".symtab"); + if (symtab) { + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) + shndx_data = symtab_shndx->data; + } else { + WARN("no .symtab"); + return NULL; + } + + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + memset(sym, 0, sizeof(*sym)); + + sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx); + if (sym->idx < 0) { + WARN("elf_move_global_symbol"); + return NULL; + } + + sym->name = sec->name; + sym->sec = sec; + + // st_name 0 + sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); + // st_other 0 + // st_value 0 + // st_size 0 + shndx = sec->idx; + if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + sym->sym.st_shndx = shndx; + if (!shndx_data) + shndx = 0; + } else { + sym->sym.st_shndx = SHN_XINDEX; + if (!shndx_data) { + WARN("no .symtab_shndx"); + return NULL; + } + } + + if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) { + WARN_ELF("gelf_update_symshndx"); + return NULL; + } + + elf_add_symbol(elf, sym); + + return sym; +} + int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct section *insn_sec, unsigned long insn_off) { - struct symbol *sym; - int addend; + struct symbol *sym = insn_sec->sym; + int addend = insn_off; - if (insn_sec->sym) { - sym = insn_sec->sym; - addend = insn_off; - - } else { + if (!sym) { /* - * The Clang assembler strips section symbols, so we have to - * reference the function symbol instead: + * Due to how weak functions work, we must use section based + * relocations. Symbol based relocations would result in the + * weak and non-weak function annotations being overlaid on the + * non-weak function after linking. */ - sym = find_symbol_containing(insn_sec, insn_off); - if (!sym) { - /* - * Hack alert. This happens when we need to reference - * the NOP pad insn immediately after the function. - */ - sym = find_symbol_containing(insn_sec, insn_off - 1); - } - - if (!sym) { - WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off); + sym = elf_create_section_symbol(elf, insn_sec); + if (!sym) return -1; - } - addend = insn_off - sym->offset; + insn_sec->sym = sym; } return elf_add_reloc(elf, sec, offset, type, sym, addend); From 4a25f2ea0e030b2fc852c4059a50181bfc5b2f57 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Thu, 21 Apr 2022 13:45:04 +0530 Subject: [PATCH 222/803] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu Tegra194 and Tegra234 SoCs have the erratum that causes walk cache entries to not be invalidated correctly. The problem is that the walk cache index generated for IOVA is not same across translation and invalidation requests. This is leading to page faults when PMD entry is released during unmap and populated with new PTE table during subsequent map request. Disabling large page mappings avoids the release of PMD entry and avoid translations seeing stale PMD entry in walk cache. Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and Tegra234 devices. This is recommended fix from Tegra hardware design team. Acked-by: Robin Murphy Reviewed-by: Krishna Reddy Co-developed-by: Pritesh Raithatha Signed-off-by: Pritesh Raithatha Signed-off-by: Ashish Mhetre Link: https://lore.kernel.org/r/20220421081504.24678-1-amhetre@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 01e9b50b10a1..87bf522b9d2e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi dev_name(dev), err); } +static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, + struct io_pgtable_cfg *pgtbl_cfg, + struct device *dev) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + const struct device_node *np = smmu->dev->of_node; + + /* + * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache + * entries to not be invalidated correctly. The problem is that the walk + * cache index generated for IOVA is not same across translation and + * invalidation requests. This is leading to page faults when PMD entry + * is released during unmap and populated with new PTE table during + * subsequent map request. Disabling large page mappings avoids the + * release of PMD entry and avoid translations seeing stale PMD entry in + * walk cache. + * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and + * Tegra234. + */ + if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || + of_device_is_compatible(np, "nvidia,tegra194-smmu")) { + smmu->pgsize_bitmap = PAGE_SIZE; + pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; + } + + return 0; +} + static const struct arm_smmu_impl nvidia_smmu_impl = { .read_reg = nvidia_smmu_read_reg, .write_reg = nvidia_smmu_write_reg, @@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { .global_fault = nvidia_smmu_global_fault, .context_fault = nvidia_smmu_context_fault, .probe_finalize = nvidia_smmu_probe_finalize, + .init_context = nvidia_smmu_init_context, }; static const struct arm_smmu_impl nvidia_smmu_single_impl = { .probe_finalize = nvidia_smmu_probe_finalize, + .init_context = nvidia_smmu_init_context, }; struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) From 87c18514bb8477563a61f50b4285da156296edc4 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Fri, 22 Apr 2022 14:26:50 +0800 Subject: [PATCH 223/803] ASoC: rt9120: Correct the reg 0x09 size to one byte Correct the reg 0x09 size to one byte. Signed-off-by: ChiYuan Huang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1650608810-3829-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt9120.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c index 7aa1772a915f..6e0d7cf0c8c9 100644 --- a/sound/soc/codecs/rt9120.c +++ b/sound/soc/codecs/rt9120.c @@ -341,7 +341,6 @@ static int rt9120_get_reg_size(unsigned int reg) { switch (reg) { case 0x00: - case 0x09: case 0x20 ... 0x27: return 2; case 0x30 ... 0x3D: From dfd2b37edf7ef469574ef7f36e3a1905ac9ead62 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Fri, 22 Apr 2022 14:50:55 +0800 Subject: [PATCH 224/803] regulator: dt-bindings: Revise the rt5190a buck/ldo description Revise the rt5190a bucks and ldo property description. Signed-off-by: ChiYuan Huang Link: https://lore.kernel.org/r/1650610255-6180-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- .../bindings/regulator/richtek,rt5190a-regulator.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml index 28725c5467fc..edb411be0390 100644 --- a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml @@ -58,7 +58,7 @@ properties: type: object $ref: regulator.yaml# description: | - regulator description for buck1 and buck4. + regulator description for buck1 to buck4, and ldo. properties: regulator-allowed-modes: From fc06b2867f4cea543505acfb194c2be4ebf0c7d3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 20 Apr 2022 19:04:08 +0800 Subject: [PATCH 225/803] net: dsa: Add missing of_node_put() in dsa_port_link_register_of The device_node pointer is returned by of_parse_phandle() with refcount incremented. We should use of_node_put() on it when done. of_node_put() will check for NULL value. Fixes: a20f997010c4 ("net: dsa: Don't instantiate phylink for CPU/DSA ports unless needed") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- net/dsa/port.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/port.c b/net/dsa/port.c index 32d472a82241..cdc56ba11f52 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1620,8 +1620,10 @@ int dsa_port_link_register_of(struct dsa_port *dp) if (ds->ops->phylink_mac_link_down) ds->ops->phylink_mac_link_down(ds, port, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); + of_node_put(phy_np); return dsa_port_phylink_register(dp); } + of_node_put(phy_np); return 0; } From f4fd84ae0765a80494b28c43b756a95100351a94 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Apr 2022 19:33:56 -0700 Subject: [PATCH 226/803] usb: dwc3: core: Only handle soft-reset in DCTL Make sure not to set run_stop bit or link state change request while initiating soft-reset. Register read-modify-write operation may unintentionally start the controller before the initialization completes with its previous DCTL value, which can cause initialization failure. Fixes: f59dcab17629 ("usb: dwc3: core: improve reset sequence") Cc: Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/6aecbd78328f102003d40ccf18ceeebd411d3703.1650594792.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 1ca9dae57855..d28cd1a6709b 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -274,7 +274,8 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg |= DWC3_DCTL_CSFTRST; - dwc3_writel(dwc->regs, DWC3_DCTL, reg); + reg &= ~DWC3_DCTL_RUN_STOP; + dwc3_gadget_dctl_write_safe(dwc, reg); /* * For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit From babc3dc9524f0bcb5a0ec61f3c3639b11508fad6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 18 Apr 2022 12:21:05 +0200 Subject: [PATCH 227/803] netfilter: nft_set_rbtree: overlap detection with element re-addition after deletion This patch fixes spurious EEXIST errors. Extend d2df92e98a34 ("netfilter: nft_set_rbtree: handle element re-addition after deletion") to deal with elements with same end flags in the same transation. Reset the overlap flag as described by 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion"). Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") Fixes: d2df92e98a34 ("netfilter: nft_set_rbtree: handle element re-addition after deletion") Signed-off-by: Pablo Neira Ayuso Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d600a566da32..7325bee7d144 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, *ext = &rbe->ext; return -EEXIST; } else { - p = &parent->rb_left; + overlap = false; + if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; } } From 41c606879f89623dd5269eaffea640b915e9e17c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 18 Apr 2022 17:43:39 +0800 Subject: [PATCH 228/803] Revert "serial: sc16is7xx: Clear RS485 bits in the shutdown" This reverts commit 927728a34f11b5a27f4610bdb7068317d6fdc72a. Once the uart_port->rs485->flag is set to SER_RS485_ENABLED, the port should always work in RS485 mode. If users want the port to leave RS485 mode, they need to call ioctl() to clear SER_RS485_ENABLED. So here we shouldn't clear the RS485 bits in the shutdown(). Fixes: 927728a34f11 ("serial: sc16is7xx: Clear RS485 bits in the shutdown") Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20220418094339.678144-1-hui.wang@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index e857fb61efbf..5fb201c1b563 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1238,12 +1238,10 @@ static void sc16is7xx_shutdown(struct uart_port *port) /* Disable all interrupts */ sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0); - /* Disable TX/RX, clear auto RS485 and RTS invert */ + /* Disable TX/RX */ sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, SC16IS7XX_EFCR_RXDISABLE_BIT | - SC16IS7XX_EFCR_TXDISABLE_BIT | - SC16IS7XX_EFCR_AUTO_RS485_BIT | - SC16IS7XX_EFCR_RTS_INVERT_BIT, + SC16IS7XX_EFCR_TXDISABLE_BIT, SC16IS7XX_EFCR_RXDISABLE_BIT | SC16IS7XX_EFCR_TXDISABLE_BIT); From a8c5b8255f8a9acd58a4b15ff1c14cd6effd114b Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Fri, 22 Apr 2022 00:10:23 -0700 Subject: [PATCH 229/803] tty: n_gsm: fix broken virtual tty handling Dynamic virtual tty registration was introduced to allow the user to handle these cases with uevent rules. The following commits relate to this: Commit 5b87686e3203 ("tty: n_gsm: Modify gsmtty driver register method when config requester") Commit 0b91b5332368 ("tty: n_gsm: Save dlci address open status when config requester") Commit 46292622ad73 ("tty: n_gsm: clean up indenting in gsm_queue()") However, the following behavior can be seen with this implementation: - n_gsm ldisc is activated via ioctl - all configuration parameters are set to their default value (initiator=0) - the mux gets activated and attached and gsmtty0 is being registered in in gsm_dlci_open() after DLCI 0 was established (DLCI 0 is the control channel) - the user configures n_gsm via ioctl GSMIOC_SETCONF as initiator - this re-attaches the n_gsm mux - no new gsmtty devices are registered in gsmld_attach_gsm() because the mux is already active - the initiator side registered only the control channel as gsmtty0 (which should never happen) and no user channel tty The commits above make it impossible to operate the initiator side as no user channel tty is or will be available. On the other hand, this behavior will make it also impossible to allow DLCI parameter negotiation on responder side in the future. The responder side first needs to provide a device for the application before the application can set its parameters of the associated DLCI via ioctl. Note that the user application is still able to detect a link establishment without relaying to uevent by waiting for DTR open on responder side. This is the same behavior as on a physical serial interface. And on initiator side a tty hangup can be detected if a link establishment request failed. Revert the commits above completely to always register all user channels and no control channel after mux attachment. No other changes are made. Fixes: 5b87686e3203 ("tty: n_gsm: Modify gsmtty driver register method when config requester") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220422071025.5490-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 87 ++++++++------------------------------------- 1 file changed, 15 insertions(+), 72 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 979dc9151383..99fe54247a87 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -272,10 +272,6 @@ static DEFINE_SPINLOCK(gsm_mux_lock); static struct tty_driver *gsm_tty_driver; -/* Save dlci open address */ -static int addr_open[256] = { 0 }; -/* Save dlci open count */ -static int addr_cnt; /* * This section of the driver logic implements the GSM encodings * both the basic and the 'advanced'. Reliable transport is not @@ -1185,7 +1181,6 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen) } static void gsm_dlci_begin_close(struct gsm_dlci *dlci); -static void gsm_dlci_close(struct gsm_dlci *dlci); /** * gsm_control_message - DLCI 0 control processing @@ -1204,28 +1199,15 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, { u8 buf[1]; unsigned long flags; - struct gsm_dlci *dlci; - int i; - int address; switch (command) { case CMD_CLD: { - if (addr_cnt > 0) { - for (i = 0; i < addr_cnt; i++) { - address = addr_open[i]; - dlci = gsm->dlci[address]; - gsm_dlci_close(dlci); - addr_open[i] = 0; - } - } + struct gsm_dlci *dlci = gsm->dlci[0]; /* Modem wishes to close down */ - dlci = gsm->dlci[0]; if (dlci) { dlci->dead = true; gsm->dead = true; - gsm_dlci_close(dlci); - addr_cnt = 0; - gsm_response(gsm, 0, UA|PF); + gsm_dlci_begin_close(dlci); } } break; @@ -1459,8 +1441,6 @@ static void gsm_dlci_close(struct gsm_dlci *dlci) wake_up_interruptible(&dlci->port.open_wait); } else dlci->gsm->dead = true; - /* Unregister gsmtty driver,report gsmtty dev remove uevent for user */ - tty_unregister_device(gsm_tty_driver, dlci->addr); wake_up(&dlci->gsm->event); /* A DLCI 0 close is a MUX termination so we need to kick that back to userspace somehow */ @@ -1482,8 +1462,6 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) dlci->state = DLCI_OPEN; if (debug & 8) pr_debug("DLCI %d goes open.\n", dlci->addr); - /* Register gsmtty driver,report gsmtty dev add uevent for user */ - tty_register_device(gsm_tty_driver, dlci->addr, NULL); /* Send current modem state */ if (dlci->addr) gsmtty_modem_update(dlci, 0); @@ -1794,7 +1772,6 @@ static void gsm_queue(struct gsm_mux *gsm) struct gsm_dlci *dlci; u8 cr; int address; - int i, j, k, address_tmp; if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; @@ -1826,11 +1803,6 @@ static void gsm_queue(struct gsm_mux *gsm) else { gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); - /* Save dlci open address */ - if (address) { - addr_open[addr_cnt] = address; - addr_cnt++; - } } break; case DISC|PF: @@ -1841,33 +1813,8 @@ static void gsm_queue(struct gsm_mux *gsm) return; } /* Real close complete */ - if (!address) { - if (addr_cnt > 0) { - for (i = 0; i < addr_cnt; i++) { - address = addr_open[i]; - dlci = gsm->dlci[address]; - gsm_dlci_close(dlci); - addr_open[i] = 0; - } - } - dlci = gsm->dlci[0]; - gsm_dlci_close(dlci); - addr_cnt = 0; - gsm_response(gsm, 0, UA|PF); - } else { - gsm_response(gsm, address, UA|PF); - gsm_dlci_close(dlci); - /* clear dlci address */ - for (j = 0; j < addr_cnt; j++) { - address_tmp = addr_open[j]; - if (address_tmp == address) { - for (k = j; k < addr_cnt; k++) - addr_open[k] = addr_open[k+1]; - addr_cnt--; - break; - } - } - } + gsm_response(gsm, address, UA|PF); + gsm_dlci_close(dlci); break; case UA|PF: if (cr == 0 || dlci == NULL) @@ -2451,19 +2398,17 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) else { /* Don't register device 0 - this is the control channel and not a usable tty interface */ - if (gsm->initiator) { - base = mux_num_to_base(gsm); /* Base for this MUX */ - for (i = 1; i < NUM_DLCI; i++) { - struct device *dev; + base = mux_num_to_base(gsm); /* Base for this MUX */ + for (i = 1; i < NUM_DLCI; i++) { + struct device *dev; - dev = tty_register_device(gsm_tty_driver, + dev = tty_register_device(gsm_tty_driver, base + i, NULL); - if (IS_ERR(dev)) { - for (i--; i >= 1; i--) - tty_unregister_device(gsm_tty_driver, - base + i); - return PTR_ERR(dev); - } + if (IS_ERR(dev)) { + for (i--; i >= 1; i--) + tty_unregister_device(gsm_tty_driver, + base + i); + return PTR_ERR(dev); } } } @@ -2485,10 +2430,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) int i; WARN_ON(tty != gsm->tty); - if (gsm->initiator) { - for (i = 1; i < NUM_DLCI; i++) - tty_unregister_device(gsm_tty_driver, base + i); - } + for (i = 1; i < NUM_DLCI; i++) + tty_unregister_device(gsm_tty_driver, base + i); tty_kref_put(gsm->tty); gsm->tty = NULL; } From c19ffe00fed6bb423d81406d2a7e5793074c7d83 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Fri, 22 Apr 2022 00:10:24 -0700 Subject: [PATCH 230/803] tty: n_gsm: fix invalid use of MSC in advanced option n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.6.3.7 states that the Modem Status Command (MSC) shall only be used if the basic option was chosen. The current implementation uses MSC frames even if advanced option was chosen to inform the peer about modem line state updates. A standard conform peer may choose to discard these frames in advanced option mode. Furthermore, gsmtty_modem_update() is not part of the 'tty_operations' functions despite its name. Rename gsmtty_modem_update() to gsm_modem_update() to clarify this. Split its function into gsm_modem_upd_via_data() and gsm_modem_upd_via_msc() depending on the encoding and adaption. Introduce gsm_dlci_modem_output() as adaption of gsm_dlci_data_output() to encode and queue empty frames in advanced option mode. Use it in gsm_modem_upd_via_data(). gsm_modem_upd_via_msc() is based on the initial gsmtty_modem_update() function which used only MSC frames to update modem states. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220422071025.5490-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 125 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 8 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 99fe54247a87..570f0b8b7576 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -366,7 +366,7 @@ static const u8 gsm_fcs8[256] = { #define GOOD_FCS 0xCF static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); -static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk); +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); /** * gsm_fcs_add - update FCS @@ -914,6 +914,63 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, return size; } +/** + * gsm_dlci_modem_output - try and push modem status out of a DLCI + * @gsm: mux + * @dlci: the DLCI to pull modem status from + * @brk: break signal + * + * Push an empty frame in to the transmit queue to update the modem status + * bits and to transmit an optional break. + * + * Caller must hold the tx_lock of the mux. + */ + +static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, + u8 brk) +{ + u8 *dp = NULL; + struct gsm_msg *msg; + int size; + + /* for modem bits without break data */ + if (dlci->adaption == 1) { + size = 0; + } else if (dlci->adaption == 2) { + size = 1; + if (brk > 0) + size++; + } else { + pr_err("%s: unsupported adaption %d\n", __func__, + dlci->adaption); + } + + msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + if (!msg) { + pr_err("%s: gsm_data_alloc error", __func__); + return -ENOMEM; + } + dp = msg->data; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + if (brk == 0) { + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; + } else { + *dp++ = gsm_encode_modem(dlci) << 1; + *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */ + } + break; + default: + /* Handled above */ + break; + } + + __gsm_data_queue(dlci, msg); + return size; +} + /** * gsm_dlci_data_sweep - look for data to send * @gsm: the GSM mux @@ -1464,7 +1521,7 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Send current modem state */ if (dlci->addr) - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); wake_up(&dlci->gsm->event); } @@ -2897,12 +2954,43 @@ static struct tty_ldisc_ops tty_ldisc_packet = { #define TX_SIZE 512 -static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) +/** + * gsm_modem_upd_via_data - send modem bits via convergence layer + * @dlci: channel + * @brk: break signal + * + * Send an empty frame to signal mobile state changes and to transmit the + * break signal for adaption 2. + */ + +static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) +{ + struct gsm_mux *gsm = dlci->gsm; + unsigned long flags; + + if (dlci->state != DLCI_OPEN || dlci->adaption != 2) + return; + + spin_lock_irqsave(&gsm->tx_lock, flags); + gsm_dlci_modem_output(gsm, dlci, brk); + spin_unlock_irqrestore(&gsm->tx_lock, flags); +} + +/** + * gsm_modem_upd_via_msc - send modem bits via control frame + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk) { u8 modembits[3]; struct gsm_control *ctrl; int len = 2; + if (dlci->gsm->encoding != 0) + return 0; + modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ if (!brk) { modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; @@ -2917,6 +3005,27 @@ static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) return gsm_control_wait(dlci->gsm, ctrl); } +/** + * gsm_modem_update - send modem status line state + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk) +{ + if (dlci->adaption == 2) { + /* Send convergence layer type 2 empty data frame. */ + gsm_modem_upd_via_data(dlci, brk); + return 0; + } else if (dlci->gsm->encoding == 0) { + /* Send as MSC control message. */ + return gsm_modem_upd_via_msc(dlci, brk); + } + + /* Modem status lines are not supported. */ + return -EPROTONOSUPPORT; +} + static int gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); @@ -2949,7 +3058,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff) modem_tx &= ~(TIOCM_DTR | TIOCM_RTS); if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } } @@ -3140,7 +3249,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty, if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - return gsmtty_modem_update(dlci, 0); + return gsm_modem_update(dlci, 0); } return 0; } @@ -3201,7 +3310,7 @@ static void gsmtty_throttle(struct tty_struct *tty) dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; /* Send an MSC with RTS cleared */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static void gsmtty_unthrottle(struct tty_struct *tty) @@ -3213,7 +3322,7 @@ static void gsmtty_unthrottle(struct tty_struct *tty) dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; /* Send an MSC with RTS set */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static int gsmtty_break_ctl(struct tty_struct *tty, int state) @@ -3231,7 +3340,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state) if (encode > 0x0F) encode = 0x0F; /* Best effort */ } - return gsmtty_modem_update(dlci, encode); + return gsm_modem_update(dlci, encode); } static void gsmtty_cleanup(struct tty_struct *tty) From f4f7d63287217ba25e5c80f5faae5e4f7118790e Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Fri, 22 Apr 2022 00:10:25 -0700 Subject: [PATCH 231/803] tty: n_gsm: fix software flow control handling n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010. See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516 The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to the newer 27.010 here. Chapter 5.4.8.1 states that XON/XOFF characters shall be used instead of Fcon/Fcoff command in advanced option mode to handle flow control. Chapter 5.4.8.2 describes how XON/XOFF characters shall be handled. Basic option mode only used Fcon/Fcoff commands and no XON/XOFF characters. These are treated as data bytes here. The current implementation uses the gsm_mux field 'constipated' to handle flow control from the remote peer and the gsm_dlci field 'constipated' to handle flow control from each DLCI. The later is unrelated to this patch. The gsm_mux field is correctly set for Fcon/Fcoff commands in gsm_control_message(). However, the same is not true for XON/XOFF characters in gsm1_receive(). Disable software flow control handling in the tty to allow explicit handling by n_gsm. Add the missing handling in advanced option mode for gsm_mux in gsm1_receive() to comply with the standard. This patch depends on the following commit: Commit 8838b2af23ca ("tty: n_gsm: fix SW flow control encoding/handling") Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220422071025.5490-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 570f0b8b7576..8652308c187f 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -232,6 +232,7 @@ struct gsm_mux { int initiator; /* Did we initiate connection */ bool dead; /* Has the mux been shut down */ struct gsm_dlci *dlci[NUM_DLCI]; + int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ spinlock_t tx_lock; @@ -2022,6 +2023,16 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) { + /* handle XON/XOFF */ + if ((c & ISO_IEC_646_MASK) == XON) { + gsm->constipated = true; + return; + } else if ((c & ISO_IEC_646_MASK) == XOFF) { + gsm->constipated = false; + /* Kick the link in case it is idling */ + gsm_data_kick(gsm, NULL); + return; + } if (c == GSM1_SOF) { /* EOF is only valid in frame if we have got to the data state */ if (gsm->state == GSM_DATA) { @@ -2449,6 +2460,9 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) int ret, i; gsm->tty = tty_kref_get(tty); + /* Turn off tty XON/XOFF handling to handle it explicitly. */ + gsm->old_c_iflag = tty->termios.c_iflag; + tty->termios.c_iflag &= (IXON | IXOFF); ret = gsm_activate_mux(gsm); if (ret != 0) tty_kref_put(gsm->tty); @@ -2489,6 +2503,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) WARN_ON(tty != gsm->tty); for (i = 1; i < NUM_DLCI; i++) tty_unregister_device(gsm_tty_driver, base + i); + /* Restore tty XON/XOFF handling. */ + gsm->tty->termios.c_iflag = gsm->old_c_iflag; tty_kref_put(gsm->tty); gsm->tty = NULL; } From 6e6eebdf5e2455f089ccd000754a0deaeb79af82 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 18 Apr 2022 16:27:10 +0100 Subject: [PATCH 232/803] serial: 8250: Also set sticky MCR bits in console restoration Sticky MCR bits are lost in console restoration if console suspending has been disabled. This currently affects the AFE bit, which works in combination with RTS which we set, so we want to make sure the UART retains control of its FIFO where previously requested. Also specific drivers may need other bits in the future. Signed-off-by: Maciej W. Rozycki Fixes: 4516d50aabed ("serial: 8250: Use canary to restart console after suspend") Cc: stable@vger.kernel.org # v4.0+ Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204181518490.9383@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 26f9330094bc..1fbd5bf264be 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -3329,7 +3329,7 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_set_divisor(port, baud, quot, frac); serial_port_out(port, UART_LCR, up->lcr); - serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } /* From 637674fa40059cddcc3ad2212728965072f62ea3 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 18 Apr 2022 16:27:16 +0100 Subject: [PATCH 233/803] serial: 8250: Correct the clock for EndRun PTP/1588 PCIe device The EndRun PTP/1588 dual serial port device is based on the Oxford Semiconductor OXPCIe952 UART device with the PCI vendor:device ID set for EndRun Technologies and is therefore driven by a fixed 62.5MHz clock input derived from the 100MHz PCI Express clock. The clock rate is divided by the oversampling rate of 16 as it is supplied to the baud rate generator, yielding the baud base of 3906250. Replace the incorrect baud base of 4000000 with the right value of 3906250 then, complementing commit 6cbe45d8ac93 ("serial: 8250: Correct the clock for OxSemi PCIe devices"). Signed-off-by: Maciej W. Rozycki Cc: stable Fixes: 1bc8cde46a159 ("8250_pci: Added driver for Endrun Technologies PTP PCIe card.") Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204181515270.9383@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index e17e97ea86fa..a293e9f107d0 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2667,7 +2667,7 @@ enum pci_board_num_t { pbn_panacom2, pbn_panacom4, pbn_plx_romulus, - pbn_endrun_2_4000000, + pbn_endrun_2_3906250, pbn_oxsemi, pbn_oxsemi_1_3906250, pbn_oxsemi_2_3906250, @@ -3195,10 +3195,10 @@ static struct pciserial_board pci_boards[] = { * signal now many ports are available * 2 port 952 Uart support */ - [pbn_endrun_2_4000000] = { + [pbn_endrun_2_3906250] = { .flags = FL_BASE0, .num_ports = 2, - .base_baud = 4000000, + .base_baud = 3906250, .uart_offset = 0x200, .first_offset = 0x1000, }, @@ -4115,7 +4115,7 @@ static const struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_endrun_2_4000000 }, + pbn_endrun_2_3906250 }, /* * Quatech cards. These actually have configurable clocks but for * now we just use the default. From 5a42ac43d0c900ade2a5c0337b2ea52d994bdec8 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 22 Apr 2022 16:10:28 +0200 Subject: [PATCH 234/803] Revert "thermal/core: Deprecate changing cooling device state from userspace" This reverts commit a67a46af4ad6342378e332b7420c1d1a2818c53f. It has been reported the warning is annoying as the cooling device state is still needed on some production system. Meanwhile we provide a way to consolidate the thermal framework to prevent multiple actors acting on the cooling devices with conflicting decisions, let's revert this warning. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_sysfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index f154bada2906..1c4aac8464a7 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -610,9 +610,6 @@ cur_state_store(struct device *dev, struct device_attribute *attr, unsigned long state; int result; - dev_warn_once(&cdev->device, - "Setting cooling device state is deprecated\n"); - if (sscanf(buf, "%ld\n", &state) != 1) return -EINVAL; From fa1ef24ae251f7916e70b6fac94c7db3bb837426 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 22 Apr 2022 16:10:29 +0200 Subject: [PATCH 235/803] thermal/governor: Remove deprecated information The userspace governor is still in use on production systems and the deprecating warning is scary. Even if we want to get rid of the userspace governor, it is too soon yet as the alternatives are not yet adopted. Change the deprecated warning by an information message suggesting to switch to the netlink thermal events. Fixes: 0275c9fb0eff ("thermal/core: Make the userspace governor deprecated") Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/thermal/Kconfig | 6 ++++-- drivers/thermal/gov_user_space.c | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index e37691e0bf20..0e5cc948373c 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -113,8 +113,10 @@ config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" select THERMAL_GOV_USER_SPACE help - Select this if you want to let the user space manage the - platform thermals. + The Userspace governor allows to get trip point crossed + notification from the kernel via uevents. It is recommended + to use the netlink interface instead which gives richer + information about the thermal framework events. config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR bool "power_allocator" diff --git a/drivers/thermal/gov_user_space.c b/drivers/thermal/gov_user_space.c index 64a18e354a20..a62a4e90bd3f 100644 --- a/drivers/thermal/gov_user_space.c +++ b/drivers/thermal/gov_user_space.c @@ -17,8 +17,7 @@ static int user_space_bind(struct thermal_zone_device *tz) { - pr_warn_once("Userspace governor deprecated: use thermal netlink " \ - "notification instead\n"); + pr_info_once("Consider using thermal netlink events interface\n"); return 0; } From e13433b4416fa31a24e621cbbbb39227a3d651dd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 21 Apr 2022 10:32:34 -0400 Subject: [PATCH 236/803] SUNRPC release the transport of a relocated task with an assigned transport A relocated task must release its previous transport. Fixes: 82ee41b85cef1 ("SUNRPC don't resend a task on an offlined transport") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af0174d7ce5a..98133aa54f19 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1065,10 +1065,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt && - !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && - (task->tk_flags & RPC_TASK_MOVEABLE))) - return; + if (task->tk_xprt) { + if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) + return; + xprt_release(task); + xprt_put(task->tk_xprt); + } if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else From 2d1746e3fda0c3612143d7c06f8e1d1830c13e23 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Fri, 15 Apr 2022 14:00:15 +0200 Subject: [PATCH 237/803] binder: Address corner cases in deferred copy and fixup When handling BINDER_TYPE_FDA object we are pushing a parent fixup with a certain skip_size but no scatter-gather copy object, since the copy is handled standalone. If BINDER_TYPE_FDA is the last children the scatter-gather copy loop will never stop to skip it, thus we are left with an item in the parent fixup list. This will trigger the BUG_ON(). This is reproducible in android when playing a video. We receive a transaction that looks like this: obj[0] BINDER_TYPE_PTR, parent obj[1] BINDER_TYPE_PTR, child obj[2] BINDER_TYPE_PTR, child obj[3] BINDER_TYPE_FDA, child Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-2-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8351c5638880..31176edb1069 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2295,6 +2295,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); @@ -2349,7 +2350,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, list_del(&sgc->node); kfree(sgc); } - BUG_ON(!list_empty(pf_head)); + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; From ef38de9217a04c9077629a24652689d8fdb4c6c6 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Fri, 15 Apr 2022 14:00:14 +0200 Subject: [PATCH 238/803] binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 Some android userspace is sending BINDER_TYPE_FDA objects with num_fds=0. Like the previous patch, this is reproducible when playing a video. Before commit 09184ae9b575 BINDER_TYPE_FDA objects with num_fds=0 were 'correctly handled', as in no fixup was performed. After commit 09184ae9b575 we aggregate fixup and skip regions in binder_ptr_fixup structs and distinguish between the two by using the skip_size field: if it's 0, then it's a fixup, otherwise skip. When processing BINDER_TYPE_FDA objects with num_fds=0 we add a skip region of skip_size=0, and this causes issues because now binder_do_deferred_txn_copies will think this was a fixup region. To address that, return early from binder_translate_fd_array to avoid adding an empty skip region. Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-1-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 31176edb1069..f3b639e89dd8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2491,6 +2491,9 @@ static int binder_translate_fd_array(struct list_head *pf_head, struct binder_proc *proc = thread->proc; int ret; + if (fda->num_fds == 0) + return 0; + fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", From 8ec1442953c66a1d8462cccd8c20b7ba561f5915 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 6 Apr 2022 22:16:49 +0800 Subject: [PATCH 239/803] riscv: patch_text: Fixup last cpu should be master These patch_text implementations are using stop_machine_cpuslocked infrastructure with atomic cpu_count. The original idea: When the master CPU patch_text, the others should wait for it. But current implementation is using the first CPU as master, which couldn't guarantee the remaining CPUs are waiting. This patch changes the last CPU as the master to solve the potential risk. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Acked-by: Palmer Dabbelt Reviewed-by: Masami Hiramatsu Fixes: 043cb41a85de ("riscv: introduce interfaces to patch kernel code") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/patch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 0b552873a577..765004b60513 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -104,7 +104,7 @@ static int patch_text_cb(void *data) struct patch_insn *patch = data; int ret = 0; - if (atomic_inc_return(&patch->cpu_count) == 1) { + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { ret = patch_text_nosync(patch->addr, &patch->insn, GET_INSN_LENGTH(patch->insn)); From b02d196c44ead1a5949729be9ff08fe781c3e48a Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Wed, 20 Apr 2022 19:52:19 +0300 Subject: [PATCH 240/803] bpf, lwt: Fix crash when using bpf_skb_set_tunnel_key() from bpf_xmit lwt hook xmit_check_hhlen() observes the dst for getting the device hard header length to make sure a modified packet can fit. When a helper which changes the dst - such as bpf_skb_set_tunnel_key() - is called as part of the xmit program the accessed dst is no longer valid. This leads to the following splat: BUG: kernel NULL pointer dereference, address: 00000000000000de #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 798 Comm: ping Not tainted 5.18.0-rc2+ #103 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:bpf_xmit+0xfb/0x17f Code: c6 c0 4d cd 8e 48 c7 c7 7d 33 f0 8e e8 42 09 fb ff 48 8b 45 58 48 8b 95 c8 00 00 00 48 2b 95 c0 00 00 00 48 83 e0 fe 48 8b 00 <0f> b7 80 de 00 00 00 39 c2 73 22 29 d0 b9 20 0a 00 00 31 d2 48 89 RSP: 0018:ffffb148c0bc7b98 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000240008 RCX: 0000000000000000 RDX: 0000000000000010 RSI: 00000000ffffffea RDI: 00000000ffffffff RBP: ffff922a828a4e00 R08: ffffffff8f1350e8 R09: 00000000ffffdfff R10: ffffffff8f055100 R11: ffffffff8f105100 R12: 0000000000000000 R13: ffff922a828a4e00 R14: 0000000000000040 R15: 0000000000000000 FS: 00007f414e8f0080(0000) GS:ffff922afdc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000de CR3: 0000000002d80006 CR4: 0000000000370ef0 Call Trace: lwtunnel_xmit.cold+0x71/0xc8 ip_finish_output2+0x279/0x520 ? __ip_finish_output.part.0+0x21/0x130 Fix by fetching the device hard header length before running the BPF code. Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220420165219.1755407-1-eyal.birger@gmail.com --- net/core/lwt_bpf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 349480ef68a5..8b6b5e72b217 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst->lwtstate->orig_output(net, sk, skb); } -static int xmit_check_hhlen(struct sk_buff *skb) +static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) { - int hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + int hh_len = dst->dev->hard_header_len; __be16 proto = skb->protocol; int ret; @@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb) /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ - ret = xmit_check_hhlen(skb); + ret = xmit_check_hhlen(skb, hh_len); if (unlikely(ret)) return ret; From 1f3e25a068832f8892a5ff71467622d012f5bc9f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 20 Apr 2022 16:24:31 +0200 Subject: [PATCH 241/803] pinctrl: rockchip: fix RK3308 pinmux bits Some of the pinmuxing bits described in rk3308_mux_recalced_data are wrong, pointing to non-existing registers. Fix the entire table. Also add a comment in front of each entry with the same string that appears in the datasheet to make the table easier to compare with the docs. This fix has been tested on real hardware for the gpio3b3_sel entry. Fixes: 7825aeb7b208 ("pinctrl: rockchip: add rk3308 SoC support") Signed-off-by: Luca Ceresoli Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20220420142432.248565-1-luca.ceresoli@bootlin.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 45 ++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index a1b598b86aa9..65fa305b5f59 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -457,95 +457,110 @@ static struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { { + /* gpio1b6_sel */ .num = 1, .pin = 14, .reg = 0x28, .bit = 12, .mask = 0xf }, { + /* gpio1b7_sel */ .num = 1, .pin = 15, .reg = 0x2c, .bit = 0, .mask = 0x3 }, { + /* gpio1c2_sel */ .num = 1, .pin = 18, .reg = 0x30, .bit = 4, .mask = 0xf }, { + /* gpio1c3_sel */ .num = 1, .pin = 19, .reg = 0x30, .bit = 8, .mask = 0xf }, { + /* gpio1c4_sel */ .num = 1, .pin = 20, .reg = 0x30, .bit = 12, .mask = 0xf }, { + /* gpio1c5_sel */ .num = 1, .pin = 21, .reg = 0x34, .bit = 0, .mask = 0xf }, { + /* gpio1c6_sel */ .num = 1, .pin = 22, .reg = 0x34, .bit = 4, .mask = 0xf }, { + /* gpio1c7_sel */ .num = 1, .pin = 23, .reg = 0x34, .bit = 8, .mask = 0xf }, { + /* gpio3b4_sel */ .num = 3, .pin = 12, .reg = 0x68, .bit = 8, .mask = 0xf }, { + /* gpio3b5_sel */ .num = 3, .pin = 13, .reg = 0x68, .bit = 12, .mask = 0xf }, { + /* gpio2a2_sel */ .num = 2, .pin = 2, - .reg = 0x608, - .bit = 0, - .mask = 0x7 + .reg = 0x40, + .bit = 4, + .mask = 0x3 }, { + /* gpio2a3_sel */ .num = 2, .pin = 3, - .reg = 0x608, - .bit = 4, - .mask = 0x7 + .reg = 0x40, + .bit = 6, + .mask = 0x3 }, { + /* gpio2c0_sel */ .num = 2, .pin = 16, - .reg = 0x610, - .bit = 8, - .mask = 0x7 + .reg = 0x50, + .bit = 0, + .mask = 0x3 }, { + /* gpio3b2_sel */ .num = 3, .pin = 10, - .reg = 0x610, - .bit = 0, - .mask = 0x7 + .reg = 0x68, + .bit = 4, + .mask = 0x3 }, { + /* gpio3b3_sel */ .num = 3, .pin = 11, - .reg = 0x610, - .bit = 4, - .mask = 0x7 + .reg = 0x68, + .bit = 6, + .mask = 0x3 }, }; From 7c4cffc5d473e87ae2eaa50aed8cb27d17bcd1ec Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 20 Apr 2022 16:24:32 +0200 Subject: [PATCH 242/803] pinctrl: rockchip: sort the rk3308_mux_recalced_data entries All the entries are sorted according to num/pin except for two entries. Sort them too. Signed-off-by: Luca Ceresoli Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20220420142432.248565-2-luca.ceresoli@bootlin.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 65fa305b5f59..2cb79e649fcf 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -512,20 +512,6 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { .reg = 0x34, .bit = 8, .mask = 0xf - }, { - /* gpio3b4_sel */ - .num = 3, - .pin = 12, - .reg = 0x68, - .bit = 8, - .mask = 0xf - }, { - /* gpio3b5_sel */ - .num = 3, - .pin = 13, - .reg = 0x68, - .bit = 12, - .mask = 0xf }, { /* gpio2a2_sel */ .num = 2, @@ -561,6 +547,20 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { .reg = 0x68, .bit = 6, .mask = 0x3 + }, { + /* gpio3b4_sel */ + .num = 3, + .pin = 12, + .reg = 0x68, + .bit = 8, + .mask = 0xf + }, { + /* gpio3b5_sel */ + .num = 3, + .pin = 13, + .reg = 0x68, + .bit = 12, + .mask = 0xf }, }; From 5b0b9e4c2c895227c8852488b3f09839233bba54 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Wed, 20 Apr 2022 17:50:26 -0700 Subject: [PATCH 243/803] tcp: md5: incorrect tcp_header_len for incoming connections In tcp_create_openreq_child we adjust tcp_header_len for md5 using the remote address in newsk. But that address is still 0 in newsk at this point, and it is only set later by the callers (tcp_v[46]_syn_recv_sock). Use the address from the request socket instead. Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Signed-off-by: Francesco Ruggeri Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220421005026.686A45EC01F2@us226.sjc.aristanetworks.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_minisocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6366df7aaf2a..6854bb1fb32b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) + if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req))) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) From 05d8af449d93e04547b4c6b328e39c890bc803f4 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 21 Apr 2022 16:08:27 +0200 Subject: [PATCH 244/803] pinctrl: stm32: Keep pinctrl block clock enabled when LEVEL IRQ requested The current EOI handler for LEVEL triggered interrupts calls clk_enable(), register IO, clk_disable(). The clock manipulation requires locking which happens with IRQs disabled in clk_enable_lock(). Instead of turning the clock on and off all the time, enable the clock in case LEVEL interrupt is requested and keep the clock enabled until all LEVEL interrupts are freed. The LEVEL interrupts are an exception on this platform and seldom used, so this does not affect the common case. This simplifies the LEVEL interrupt handling considerably and also fixes the following splat found when using preempt-rt: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/locking/rtmutex.c:2040 __rt_mutex_trylock+0x37/0x62 Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.109-rt65-stable-standard-00068-g6a5afc4b1217 #85 Hardware name: STM32 (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0xb/0xc) [] (show_stack) from [] (dump_stack+0x6f/0x84) [] (dump_stack) from [] (__warn+0x7f/0xa4) [] (__warn) from [] (warn_slowpath_fmt+0x3b/0x74) [] (warn_slowpath_fmt) from [] (__rt_mutex_trylock+0x37/0x62) [] (__rt_mutex_trylock) from [] (rt_spin_trylock+0x7/0x16) [] (rt_spin_trylock) from [] (clk_enable_lock+0xb/0x80) [] (clk_enable_lock) from [] (clk_core_enable_lock+0x9/0x18) [] (clk_core_enable_lock) from [] (stm32_gpio_get+0x11/0x24) [] (stm32_gpio_get) from [] (stm32_gpio_irq_trigger+0x1f/0x48) [] (stm32_gpio_irq_trigger) from [] (handle_fasteoi_irq+0x71/0xa8) [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x19/0x22) [] (generic_handle_irq) from [] (__handle_domain_irq+0x55/0x64) [] (__handle_domain_irq) from [] (gic_handle_irq+0x53/0x64) [] (gic_handle_irq) from [] (__irq_svc+0x65/0xc0) Exception stack(0xc0e01f18 to 0xc0e01f60) 1f00: 0000300c 00000000 1f20: 0000300c c010ff01 00000000 00000000 c0e00000 c0e07714 00000001 c0e01f78 1f40: c0e07758 00000000 ef7cd0ff c0e01f68 c010554b c0105542 40000033 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0xc/0x1e) [] (arch_cpu_idle) from [] (default_idle_call+0x21/0x3c) [] (default_idle_call) from [] (do_idle+0xe3/0x1e4) [] (do_idle) from [] (cpu_startup_entry+0x13/0x14) [] (cpu_startup_entry) from [] (start_kernel+0x397/0x3d4) [] (start_kernel) from [<00000000>] (0x0) ---[ end trace 0000000000000002 ]--- Power consumption measured on STM32MP157C DHCOM SoM is not increased or is below noise threshold. Fixes: 47beed513a85b ("pinctrl: stm32: Add level interrupt support to gpio irq chip") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Fabien Dessenne Cc: Linus Walleij Cc: Marc Zyngier Cc: linux-stm32@st-md-mailman.stormreply.com Cc: linux-arm-kernel@lists.infradead.org To: linux-gpio@vger.kernel.org Reviewed-by: Fabien Dessenne Link: https://lore.kernel.org/r/20220421140827.214088-1-marex@denx.de Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index df1d6b466fb7..f7c9459f6628 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -225,6 +225,13 @@ static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset) pinctrl_gpio_free(chip->base + offset); } +static int stm32_gpio_get_noclk(struct gpio_chip *chip, unsigned int offset) +{ + struct stm32_gpio_bank *bank = gpiochip_get_data(chip); + + return !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset)); +} + static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) { struct stm32_gpio_bank *bank = gpiochip_get_data(chip); @@ -232,7 +239,7 @@ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) clk_enable(bank->clk); - ret = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset)); + ret = stm32_gpio_get_noclk(chip, offset); clk_disable(bank->clk); @@ -316,7 +323,7 @@ static void stm32_gpio_irq_trigger(struct irq_data *d) return; /* If level interrupt type then retrig */ - level = stm32_gpio_get(&bank->gpio_chip, d->hwirq); + level = stm32_gpio_get_noclk(&bank->gpio_chip, d->hwirq); if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) || (level == 1 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_HIGH)) irq_chip_retrigger_hierarchy(d); @@ -358,6 +365,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data) { struct stm32_gpio_bank *bank = irq_data->domain->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); + unsigned long flags; int ret; ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq); @@ -371,6 +379,10 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data) return ret; } + flags = irqd_get_trigger_type(irq_data); + if (flags & IRQ_TYPE_LEVEL_MASK) + clk_enable(bank->clk); + return 0; } @@ -378,6 +390,9 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data) { struct stm32_gpio_bank *bank = irq_data->domain->host_data; + if (bank->irq_type[irq_data->hwirq] & IRQ_TYPE_LEVEL_MASK) + clk_disable(bank->clk); + gpiochip_unlock_as_irq(&bank->gpio_chip, irq_data->hwirq); } From 7f40ea2145d926510b27b785562d2c92df1b0d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Thu, 21 Apr 2022 12:12:47 +0200 Subject: [PATCH 245/803] net: bridge: switchdev: check br_vlan_group() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit br_vlan_group() can return NULL and thus return value must be checked to avoid dereferencing a NULL pointer. Fixes: 6284c723d9b9 ("net: bridge: mst: Notify switchdev drivers of VLAN MSTI migrations") Signed-off-by: Clément Léger Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20220421101247.121896-1-clement.leger@bootlin.com Signed-off-by: Jakub Kicinski --- net/bridge/br_switchdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 8cc44c367231..18affda2b522 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -353,6 +353,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev, attr.orig_dev = br_dev; vg = br_vlan_group(br); + if (!vg) + return 0; list_for_each_entry(v, &vg->vlan_list, vlist) { if (v->msti) { From b391719191c1b1f5d89330b00c98f21775e5fd8c Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Mon, 18 Apr 2022 20:35:57 -0300 Subject: [PATCH 246/803] dt-bindings: net: dsa: realtek: cleanup compatible strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compatible strings are used to help the driver find the chip ID/version register for each chip family. After that, the driver can setup the switch accordingly. Keep only the first supported model for each family as a compatible string and reference other chip models in the description. The removed compatible strings have never been used in a released kernel. Link: https://lore.kernel.org/netdev/20220414014055.m4wbmr7tdz6hsa3m@bang-olufsen.dk/ Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Andrew Lunn Acked-by: Arınç ÜNAL Reviewed-by: Alvin Šipraga Link: https://lore.kernel.org/r/20220418233558.13541-1-luizluca@gmail.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/dsa/realtek.yaml | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/realtek.yaml b/Documentation/devicetree/bindings/net/dsa/realtek.yaml index 8756060895a8..99ee4b5b9346 100644 --- a/Documentation/devicetree/bindings/net/dsa/realtek.yaml +++ b/Documentation/devicetree/bindings/net/dsa/realtek.yaml @@ -27,32 +27,25 @@ description: The realtek-mdio driver is an MDIO driver and it must be inserted inside an MDIO node. + The compatible string is only used to identify which (silicon) family the + switch belongs to. Roughly speaking, a family is any set of Realtek switches + whose chip identification register(s) have a common location and semantics. + The different models in a given family can be automatically disambiguated by + parsing the chip identification register(s) according to the given family, + avoiding the need for a unique compatible string for each model. + properties: compatible: enum: - realtek,rtl8365mb - - realtek,rtl8366 - realtek,rtl8366rb - - realtek,rtl8366s - - realtek,rtl8367 - - realtek,rtl8367b - - realtek,rtl8367rb - - realtek,rtl8367s - - realtek,rtl8368s - - realtek,rtl8369 - - realtek,rtl8370 description: | - realtek,rtl8365mb: 4+1 ports - realtek,rtl8366: 5+1 ports - realtek,rtl8366rb: 5+1 ports - realtek,rtl8366s: 5+1 ports - realtek,rtl8367: - realtek,rtl8367b: - realtek,rtl8367rb: 5+2 ports - realtek,rtl8367s: 5+2 ports - realtek,rtl8368s: 8 ports - realtek,rtl8369: 8+1 ports - realtek,rtl8370: 8+2 ports + realtek,rtl8365mb: + Use with models RTL8363NB, RTL8363NB-VB, RTL8363SC, RTL8363SC-VB, + RTL8364NB, RTL8364NB-VB, RTL8365MB, RTL8366SC, RTL8367RB-VB, RTL8367S, + RTL8367SB, RTL8370MB, RTL8310SR + realtek,rtl8366rb: + Use with models RTL8366RB, RTL8366S mdc-gpios: description: GPIO line for the MDC clock line. @@ -335,7 +328,7 @@ examples: #size-cells = <0>; switch@29 { - compatible = "realtek,rtl8367s"; + compatible = "realtek,rtl8365mb"; reg = <29>; reset-gpios = <&gpio2 20 GPIO_ACTIVE_LOW>; From b107a6392b4bdd0e10e155e6b66d75af9e44d85a Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Mon, 18 Apr 2022 20:35:58 -0300 Subject: [PATCH 247/803] net: dsa: realtek: remove realtek,rtl8367s string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to add new compatible strings for each new supported chip version. The compatible string is used only to select the subdriver (rtl8365mb.c or rtl8366rb.c). Once in the subdriver, it will detect the chip model by itself, ignoring which compatible string was used. Link: https://lore.kernel.org/netdev/20220414014055.m4wbmr7tdz6hsa3m@bang-olufsen.dk/ Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Alvin Šipraga Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Acked-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20220418233558.13541-2-luizluca@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/realtek/realtek-mdio.c | 1 - drivers/net/dsa/realtek/realtek-smi.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c index 31e1f100e48e..c58f49d558d2 100644 --- a/drivers/net/dsa/realtek/realtek-mdio.c +++ b/drivers/net/dsa/realtek/realtek-mdio.c @@ -267,7 +267,6 @@ static const struct of_device_id realtek_mdio_of_match[] = { #endif #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, }, - { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, }, #endif { /* sentinel */ }, }; diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index 6cec559c90ce..45992f79ec8d 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -551,10 +551,6 @@ static const struct of_device_id realtek_smi_of_match[] = { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, }, - { - .compatible = "realtek,rtl8367s", - .data = &rtl8365mb_variant, - }, #endif { /* sentinel */ }, }; From b253a0680ceadc5d7b4acca7aa2d870326cad8ad Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Wed, 20 Apr 2022 10:34:41 +0800 Subject: [PATCH 248/803] tcp: ensure to use the most recently sent skb when filling the rate sample If an ACK (s)acks multiple skbs, we favor the information from the most recently sent skb by choosing the skb with the highest prior_delivered count. But in the interval between receiving ACKs, we send multiple skbs with the same prior_delivered, because the tp->delivered only changes when we receive an ACK. We used RACK's solution, copying tcp_rack_sent_after() as tcp_skb_sent_after() helper to determine "which packet was sent last?". Later, we will use tcp_skb_sent_after() instead in RACK. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Pengcheng Yang Cc: Paolo Abeni Acked-by: Neal Cardwell Tested-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/1650422081-22153-1-git-send-email-yangpc@wangsu.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 6 ++++++ net/ipv4/tcp_rate.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 70ca4a5e330a..be712fb9ddd7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1042,6 +1042,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ @@ -1164,6 +1165,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); +static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) +{ + return t1 > t2 || (t1 == t2 && after(seq1, seq2)); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index fbab921670cc..9a8e014d9b5b 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently - * sent skb, i.e., the skb with the highest prior_delivered count. + * sent skb, i.e., the skb with the most recently sent time and the highest + * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; + tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || - after(scb->tx.delivered, rs->prior_delivered)) { + tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, + scb->end_seq, rs->last_end_seq)) { rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); + tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); From 00f3d2ed9dac8fc8674a021765a0772f74c6127b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 21 Apr 2022 15:48:04 +0200 Subject: [PATCH 249/803] wireguard: selftests: enable ACPI for SMP It turns out that by having CONFIG_ACPI=n, we've been failing to boot additional CPUs, and so these systems were functionally UP. The code bloat is unfortunate for build times, but I don't see an alternative. So this commit sets CONFIG_ACPI=y for x86_64 and i686 configs. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/arch/i686.config | 1 + tools/testing/selftests/wireguard/qemu/arch/x86_64.config | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index a85025d7206e..a9b4fe795048 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -1,3 +1,4 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config index 00a1ef4869d5..45dd53a0d760 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -1,3 +1,4 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y From 45ac774c33d834fe9d4de06ab5f1022fe8cd2071 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 21 Apr 2022 15:48:05 +0200 Subject: [PATCH 250/803] wireguard: device: check for metadata_dst with skb_valid_dst() When we try to transmit an skb with md_dst attached through wireguard we hit a null pointer dereference in wg_xmit() due to the use of dst_mtu() which calls into dst_blackhole_mtu() which in turn tries to dereference dst->dev. Since wireguard doesn't use md_dsts we should use skb_valid_dst(), which checks for DST_METADATA flag, and if it's set, then falls back to wireguard's device mtu. That gives us the best chance of transmitting the packet; otherwise if the blackhole netdev is used we'd get ETH_MIN_MTU. [ 263.693506] BUG: kernel NULL pointer dereference, address: 00000000000000e0 [ 263.693908] #PF: supervisor read access in kernel mode [ 263.694174] #PF: error_code(0x0000) - not-present page [ 263.694424] PGD 0 P4D 0 [ 263.694653] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 263.694876] CPU: 5 PID: 951 Comm: mausezahn Kdump: loaded Not tainted 5.18.0-rc1+ #522 [ 263.695190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014 [ 263.695529] RIP: 0010:dst_blackhole_mtu+0x17/0x20 [ 263.695770] Code: 00 00 00 0f 1f 44 00 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 47 10 48 83 e0 fc 8b 40 04 85 c0 75 09 48 8b 07 <8b> 80 e0 00 00 00 c3 66 90 0f 1f 44 00 00 48 89 d7 be 01 00 00 00 [ 263.696339] RSP: 0018:ffffa4a4422fbb28 EFLAGS: 00010246 [ 263.696600] RAX: 0000000000000000 RBX: ffff8ac9c3553000 RCX: 0000000000000000 [ 263.696891] RDX: 0000000000000401 RSI: 00000000fffffe01 RDI: ffffc4a43fb48900 [ 263.697178] RBP: ffffa4a4422fbb90 R08: ffffffff9622635e R09: 0000000000000002 [ 263.697469] R10: ffffffff9b69a6c0 R11: ffffa4a4422fbd0c R12: ffff8ac9d18b1a00 [ 263.697766] R13: ffff8ac9d0ce1840 R14: ffff8ac9d18b1a00 R15: ffff8ac9c3553000 [ 263.698054] FS: 00007f3704c337c0(0000) GS:ffff8acaebf40000(0000) knlGS:0000000000000000 [ 263.698470] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 263.698826] CR2: 00000000000000e0 CR3: 0000000117a5c000 CR4: 00000000000006e0 [ 263.699214] Call Trace: [ 263.699505] [ 263.699759] wg_xmit+0x411/0x450 [ 263.700059] ? bpf_skb_set_tunnel_key+0x46/0x2d0 [ 263.700382] ? dev_queue_xmit_nit+0x31/0x2b0 [ 263.700719] dev_hard_start_xmit+0xd9/0x220 [ 263.701047] __dev_queue_xmit+0x8b9/0xd30 [ 263.701344] __bpf_redirect+0x1a4/0x380 [ 263.701664] __dev_queue_xmit+0x83b/0xd30 [ 263.701961] ? packet_parse_headers+0xb4/0xf0 [ 263.702275] packet_sendmsg+0x9a8/0x16a0 [ 263.702596] ? _raw_spin_unlock_irqrestore+0x23/0x40 [ 263.702933] sock_sendmsg+0x5e/0x60 [ 263.703239] __sys_sendto+0xf0/0x160 [ 263.703549] __x64_sys_sendto+0x20/0x30 [ 263.703853] do_syscall_64+0x3b/0x90 [ 263.704162] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 263.704494] RIP: 0033:0x7f3704d50506 [ 263.704789] Code: 48 c7 c0 ff ff ff ff eb b7 66 2e 0f 1f 84 00 00 00 00 00 90 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 72 c3 90 55 48 83 ec 30 44 89 4c 24 2c 4c 89 [ 263.705652] RSP: 002b:00007ffe954b0b88 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 263.706141] RAX: ffffffffffffffda RBX: 0000558bb259b490 RCX: 00007f3704d50506 [ 263.706544] RDX: 000000000000004a RSI: 0000558bb259b7b2 RDI: 0000000000000003 [ 263.706952] RBP: 0000000000000000 R08: 00007ffe954b0b90 R09: 0000000000000014 [ 263.707339] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe954b0b90 [ 263.707735] R13: 000000000000004a R14: 0000558bb259b7b2 R15: 0000000000000001 [ 263.708132] [ 263.708398] Modules linked in: bridge netconsole bonding [last unloaded: bridge] [ 263.708942] CR2: 00000000000000e0 Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Link: https://github.com/cilium/cilium/issues/19428 Reported-by: Martynas Pumputis Signed-off-by: Nikolay Aleksandrov Acked-by: Daniel Borkmann Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 0fad1331303c..aa9a7a5970fd 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -167,7 +168,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) goto err_peer; } - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; __skb_queue_head_init(&packets); if (!skb_is_gso(skb)) { From 5fd1fe4807f91ea0cca043114d929faa11bd4190 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 20 Apr 2022 10:23:45 -0500 Subject: [PATCH 251/803] net: ethernet: stmmac: fix write to sgmii_adapter_base I made a mistake with the commit a6aaa0032424 ("net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link"). I should have tested against both scenario of having a SGMII interface and one without. Without the SGMII PCS TSE adpater, the sgmii_adapter_base address is NULL, thus a write to this address will fail. Cc: stable@vger.kernel.org Fixes: a6aaa0032424 ("net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link") Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220420152345.27415-1-dinguyen@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index ac9e6c7a33b5..6b447d8f0bd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -65,8 +65,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) struct phy_device *phy_dev = ndev->phydev; u32 val; - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + if (sgmii_adapter_base) + writew(SGMII_ADAPTER_DISABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -88,10 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - if (phy_dev) + if (phy_dev && sgmii_adapter_base) { + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); + } } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) From 8f9fb2abe22ece8cac47a8cef3e716441d4ba169 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:28 +0100 Subject: [PATCH 252/803] clk: microchip: mpfs: fix parents for FIC clocks The fabric interconnects are on the AXI bus not AHB. Update their parent clocks to fix this. Fixes: 635e5e73370e ("clk: microchip: Add driver for Microchip PolarFire SoC") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-2-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-mpfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 744ef2ba2a0c..8c433b37d6c6 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -273,11 +273,11 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_GPIO1, "clk_periph_gpio1", PARENT_CLK(AHB), 21, 0), CLK_PERIPH(CLK_GPIO2, "clk_periph_gpio2", PARENT_CLK(AHB), 22, 0), CLK_PERIPH(CLK_DDRC, "clk_periph_ddrc", PARENT_CLK(AHB), 23, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AHB), 24, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AHB), 25, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AHB), 26, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AHB), 27, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AHB), 28, 0), + CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AXI), 24, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AXI), 25, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AXI), 26, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AXI), 27, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, 0), CLK_PERIPH(CLK_CFM, "clk_periph_cfm", PARENT_CLK(AHB), 29, 0), }; From a2438f82366eaeb4dc122c021884ea8deea5b215 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:29 +0100 Subject: [PATCH 253/803] clk: microchip: mpfs: mark CLK_ATHENA as critical CLK_ATHENA is another fabric interconnect and should be marked as critical as with FIC0-3, since disabling it will cause part of the fabric to go into reset. Fixes: 635e5e73370e ("clk: microchip: Add driver for Microchip PolarFire SoC") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-3-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-mpfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 8c433b37d6c6..7056d6d5b92b 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -245,8 +245,10 @@ static const struct clk_ops mpfs_periph_clk_ops = { * trap handler * - CLK_MMUART0: reserved by the hss * - CLK_DDRC: provides clock to the ddr subsystem - * - CLK_FICx: these provide clocks for sections of the fpga fabric, disabling them would - * cause the fabric to go into reset + * - CLK_FICx: these provide the processor side clocks to the "FIC" (Fabric InterConnect) + * clock domain crossers which provide the interface to the FPGA fabric. Disabling them + * causes the FPGA fabric to go into reset. + * - CLK_ATHENA: The athena clock is FIC4, which is reserved for the Athena TeraFire. */ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { @@ -277,7 +279,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AXI), 25, CLK_IS_CRITICAL), CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AXI), 26, CLK_IS_CRITICAL), CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AXI), 27, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, 0), + CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, CLK_IS_CRITICAL), CLK_PERIPH(CLK_CFM, "clk_periph_cfm", PARENT_CLK(AHB), 29, 0), }; From 2b6190c804238cbdca4e4fbe20304151203a3837 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:30 +0100 Subject: [PATCH 254/803] riscv: dts: microchip: fix usage of fic clocks on mpfs The fic clocks passed to the pcie controller and other peripherals in the device tree are not the clocks they actually run on. The fics are actually clock domain crossers & the clock config blocks output is the mss/cpu side input to the interconnect. The peripherals are actually clocked by fixed frequency clocks embedded in the fpga fabric. Fix the device tree so that these peripherals use the correct clocks. The fabric side FIC0 & FIC1 inputs both use the same 125 MHz, so only one clock is created for them. Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-4-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- .../dts/microchip/microchip-mpfs-fabric.dtsi | 16 ++++++++++++++-- .../riscv/boot/dts/microchip/microchip-mpfs.dtsi | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi index 854320e17b28..ccaac3371cf9 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi @@ -7,7 +7,7 @@ reg = <0x0 0x41000000 0x0 0xF0>; microchip,sync-update-mask = /bits/ 32 <0>; #pwm-cells = <2>; - clocks = <&clkcfg CLK_FIC3>; + clocks = <&fabric_clk3>; status = "disabled"; }; @@ -16,10 +16,22 @@ reg = <0x0 0x44000000 0x0 0x1000>; #address-cells = <1>; #size-cells = <0>; - clocks = <&clkcfg CLK_FIC3>; + clocks = <&fabric_clk3>; interrupt-parent = <&plic>; interrupts = <122>; clock-frequency = <100000>; status = "disabled"; }; + + fabric_clk3: fabric-clk3 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <62500000>; + }; + + fabric_clk1: fabric-clk1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; + }; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index c5c9d1360de0..3b48b7f35410 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -424,7 +424,7 @@ <0 0 0 3 &pcie_intc 2>, <0 0 0 4 &pcie_intc 3>; interrupt-map-mask = <0 0 0 7>; - clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>; + clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>; clock-names = "fic0", "fic1", "fic3"; ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; msi-parent = <&pcie>; From 3ebb9fdf466a246bb17164b70039dce584a0b959 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:31 +0100 Subject: [PATCH 255/803] dt-bindings: clk: mpfs document msspll dri registers As there are two sections of registers that are responsible for clock configuration on the PolarFire SoC: add the dynamic reconfiguration interface section to the binding & describe what each of the sections are used for. Fixes: 2145bb687e3f ("dt-bindings: clk: microchip: Add Microchip PolarFire host binding") Reviewed-by: Daire McNamara Acked-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-5-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/microchip,mpfs.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml index 0c15afa2214c..016a4f378b9b 100644 --- a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml +++ b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml @@ -22,7 +22,16 @@ properties: const: microchip,mpfs-clkcfg reg: - maxItems: 1 + items: + - description: | + clock config registers: + These registers contain enable, reset & divider tables for the, cpu, + axi, ahb and rtc/mtimer reference clocks as well as enable and reset + for the peripheral clocks. + - description: | + mss pll dri registers: + Block of registers responsible for dynamic reconfiguration of the mss + pll clocks: maxItems: 1 @@ -51,7 +60,7 @@ examples: #size-cells = <2>; clkcfg: clock-controller@20002000 { compatible = "microchip,mpfs-clkcfg"; - reg = <0x0 0x20002000 0x0 0x1000>; + reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; clocks = <&ref>; #clock-cells = <1>; }; From 8be99c7b8bb17f2b82af4a0a6798b795f4d74436 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:32 +0100 Subject: [PATCH 256/803] dt-bindings: clk: mpfs: add defines for two new clocks The RTC reference and MSSPLL were previously not documented or defined, as they were unused. Add their defines to the PolarFire SoC header. Fixes: 2145bb687e3f ("dt-bindings: clk: microchip: Add Microchip PolarFire host binding") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-6-conor.dooley@microchip.com Acked-by: Krzysztof Kozlowski Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/microchip,mpfs-clock.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h index 73f2a9324857..4048669bf756 100644 --- a/include/dt-bindings/clock/microchip,mpfs-clock.h +++ b/include/dt-bindings/clock/microchip,mpfs-clock.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Daire McNamara, - * Copyright (C) 2020 Microchip Technology Inc. All rights reserved. + * Copyright (C) 2020-2022 Microchip Technology Inc. All rights reserved. */ #ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ @@ -42,4 +42,7 @@ #define CLK_ATHENA 31 #define CLK_CFM 32 +#define CLK_RTCREF 33 +#define CLK_MSSPLL 34 + #endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */ From 8e8fbab4f1e659f9955bc946a2fc71b8c3ba17e0 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:33 +0100 Subject: [PATCH 257/803] dt-bindings: rtc: add refclk to mpfs-rtc The rtc on PolarFire SoC does not use the AHB clock as its reference frequency, but rather a 1 MHz refclk that it shares with MTIMER. Add this second clock to the binding as a required property. Fixes: 4cbcc0d7b397 ("dt-bindings: rtc: add bindings for microchip mpfs rtc") Reviewed-by: Daire McNamara Acked-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-7-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- .../bindings/rtc/microchip,mfps-rtc.yaml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml index a2e984ea3553..500c62becd6b 100644 --- a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml @@ -31,11 +31,19 @@ properties: to that of the RTC's count register. clocks: - maxItems: 1 + items: + - description: | + AHB clock + - description: | + Reference clock: divided by the prescaler to create a time-based + strobe (typically 1 Hz) for the calendar counter. By default, the rtc + on the PolarFire SoC shares it's reference with MTIMER so this will + be a 1 MHz clock. clock-names: items: - const: rtc + - const: rtcref required: - compatible @@ -48,11 +56,12 @@ additionalProperties: false examples: - | + #include "dt-bindings/clock/microchip,mpfs-clock.h" rtc@20124000 { compatible = "microchip,mpfs-rtc"; reg = <0x20124000 0x1000>; - clocks = <&clkcfg 21>; - clock-names = "rtc"; + clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>; + clock-names = "rtc", "rtcref"; interrupts = <80>, <81>; }; ... From 445c2da89747e2583062d988c98726cb2744b357 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:34 +0100 Subject: [PATCH 258/803] clk: microchip: mpfs: re-parent the configurable clocks Currently the mpfs clock driver uses a reference clock called the "msspll", set in the device tree, as the parent for the cpu/axi/ahb (config) clocks. The frequency of the msspll is determined by the FPGA bitstream & the bootloader configures the clock to match the bitstream. The real reference is provided by a 100 or 125 MHz off chip oscillator. However, the msspll clock is not actually the parent of all clocks on the system - the reference clock for the rtc/mtimer actually has the off chip oscillator as its parent. In order to fix this, add support for reading the configuration of the msspll & reparent the "config" clocks so that they are derived from this clock rather than the reference in the device tree. Fixes: 635e5e73370e ("clk: microchip: Add driver for Microchip PolarFire SoC") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-8-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-mpfs.c | 151 +++++++++++++++++++++++++++---- 1 file changed, 132 insertions(+), 19 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 7056d6d5b92b..8338a4c15404 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -11,20 +11,47 @@ #include /* address offset of control registers */ +#define REG_MSSPLL_REF_CR 0x08u +#define REG_MSSPLL_POSTDIV_CR 0x10u +#define REG_MSSPLL_SSCG_2_CR 0x2Cu #define REG_CLOCK_CONFIG_CR 0x08u #define REG_SUBBLK_CLOCK_CR 0x84u #define REG_SUBBLK_RESET_CR 0x88u +#define MSSPLL_FBDIV_SHIFT 0x00u +#define MSSPLL_FBDIV_WIDTH 0x0Cu +#define MSSPLL_REFDIV_SHIFT 0x08u +#define MSSPLL_REFDIV_WIDTH 0x06u +#define MSSPLL_POSTDIV_SHIFT 0x08u +#define MSSPLL_POSTDIV_WIDTH 0x07u +#define MSSPLL_FIXED_DIV 4u + struct mpfs_clock_data { void __iomem *base; + void __iomem *msspll_base; struct clk_hw_onecell_data hw_data; }; +struct mpfs_msspll_hw_clock { + void __iomem *base; + unsigned int id; + u32 reg_offset; + u32 shift; + u32 width; + u32 flags; + struct clk_hw hw; + struct clk_init_data init; +}; + +#define to_mpfs_msspll_clk(_hw) container_of(_hw, struct mpfs_msspll_hw_clock, hw) + struct mpfs_cfg_clock { const struct clk_div_table *table; unsigned int id; + u32 reg_offset; u8 shift; u8 width; + u8 flags; }; struct mpfs_cfg_hw_clock { @@ -55,7 +82,7 @@ struct mpfs_periph_hw_clock { */ static DEFINE_SPINLOCK(mpfs_clk_lock); -static const struct clk_parent_data mpfs_cfg_parent[] = { +static const struct clk_parent_data mpfs_ext_ref[] = { { .index = 0 }, }; @@ -69,6 +96,75 @@ static const struct clk_div_table mpfs_div_ahb_table[] = { { 0, 0 } }; +static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate) +{ + struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw); + void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset; + void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR; + void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR; + u32 mult, ref_div, postdiv; + + mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT; + mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH); + ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT; + ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH); + postdiv = readl_relaxed(postdiv_addr) >> MSSPLL_POSTDIV_SHIFT; + postdiv &= clk_div_mask(MSSPLL_POSTDIV_WIDTH); + + return prate * mult / (ref_div * MSSPLL_FIXED_DIV * postdiv); +} + +static const struct clk_ops mpfs_clk_msspll_ops = { + .recalc_rate = mpfs_clk_msspll_recalc_rate, +}; + +#define CLK_PLL(_id, _name, _parent, _shift, _width, _flags, _offset) { \ + .id = _id, \ + .shift = _shift, \ + .width = _width, \ + .reg_offset = _offset, \ + .flags = _flags, \ + .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_msspll_ops, 0), \ +} + +static struct mpfs_msspll_hw_clock mpfs_msspll_clks[] = { + CLK_PLL(CLK_MSSPLL, "clk_msspll", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT, + MSSPLL_FBDIV_WIDTH, 0, REG_MSSPLL_SSCG_2_CR), +}; + +static int mpfs_clk_register_msspll(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hw, + void __iomem *base) +{ + msspll_hw->base = base; + + return devm_clk_hw_register(dev, &msspll_hw->hw); +} + +static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hws, + unsigned int num_clks, struct mpfs_clock_data *data) +{ + void __iomem *base = data->msspll_base; + unsigned int i; + int ret; + + for (i = 0; i < num_clks; i++) { + struct mpfs_msspll_hw_clock *msspll_hw = &msspll_hws[i]; + + ret = mpfs_clk_register_msspll(dev, msspll_hw, base); + if (ret) + return dev_err_probe(dev, ret, "failed to register msspll id: %d\n", + CLK_MSSPLL); + + data->hw_data.hws[msspll_hw->id] = &msspll_hw->hw; + } + + return 0; +} + +/* + * "CFG" clocks + */ + static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long prate) { struct mpfs_cfg_hw_clock *cfg_hw = to_mpfs_cfg_clk(hw); @@ -76,10 +172,10 @@ static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long p void __iomem *base_addr = cfg_hw->sys_base; u32 val; - val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR) >> cfg->shift; + val = readl_relaxed(base_addr + cfg->reg_offset) >> cfg->shift; val &= clk_div_mask(cfg->width); - return prate / (1u << val); + return divider_recalc_rate(hw, prate, val, cfg->table, cfg->flags, cfg->width); } static long mpfs_cfg_clk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) @@ -105,11 +201,10 @@ static int mpfs_cfg_clk_set_rate(struct clk_hw *hw, unsigned long rate, unsigned return divider_setting; spin_lock_irqsave(&mpfs_clk_lock, flags); - - val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR); + val = readl_relaxed(base_addr + cfg->reg_offset); val &= ~(clk_div_mask(cfg->width) << cfg_hw->cfg.shift); val |= divider_setting << cfg->shift; - writel_relaxed(val, base_addr + REG_CLOCK_CONFIG_CR); + writel_relaxed(val, base_addr + cfg->reg_offset); spin_unlock_irqrestore(&mpfs_clk_lock, flags); @@ -122,19 +217,23 @@ static const struct clk_ops mpfs_clk_cfg_ops = { .set_rate = mpfs_cfg_clk_set_rate, }; -#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags) { \ - .cfg.id = _id, \ - .cfg.shift = _shift, \ - .cfg.width = _width, \ - .cfg.table = _table, \ - .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_cfg_ops, \ - _flags), \ +#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags, _offset) { \ + .cfg.id = _id, \ + .cfg.shift = _shift, \ + .cfg.width = _width, \ + .cfg.table = _table, \ + .cfg.reg_offset = _offset, \ + .cfg.flags = _flags, \ + .hw.init = CLK_HW_INIT(_name, _parent, &mpfs_clk_cfg_ops, 0), \ } static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = { - CLK_CFG(CLK_CPU, "clk_cpu", mpfs_cfg_parent, 0, 2, mpfs_div_cpu_axi_table, 0), - CLK_CFG(CLK_AXI, "clk_axi", mpfs_cfg_parent, 2, 2, mpfs_div_cpu_axi_table, 0), - CLK_CFG(CLK_AHB, "clk_ahb", mpfs_cfg_parent, 4, 2, mpfs_div_ahb_table, 0), + CLK_CFG(CLK_CPU, "clk_cpu", "clk_msspll", 0, 2, mpfs_div_cpu_axi_table, 0, + REG_CLOCK_CONFIG_CR), + CLK_CFG(CLK_AXI, "clk_axi", "clk_msspll", 2, 2, mpfs_div_cpu_axi_table, 0, + REG_CLOCK_CONFIG_CR), + CLK_CFG(CLK_AHB, "clk_ahb", "clk_msspll", 4, 2, mpfs_div_ahb_table, 0, + REG_CLOCK_CONFIG_CR), }; static int mpfs_clk_register_cfg(struct device *dev, struct mpfs_cfg_hw_clock *cfg_hw, @@ -160,13 +259,17 @@ static int mpfs_clk_register_cfgs(struct device *dev, struct mpfs_cfg_hw_clock * return dev_err_probe(dev, ret, "failed to register clock id: %d\n", cfg_hw->cfg.id); - id = cfg_hws[i].cfg.id; + id = cfg_hw->cfg.id; data->hw_data.hws[id] = &cfg_hw->hw; } return 0; } +/* + * peripheral clocks - devices connected to axi or ahb buses. + */ + static int mpfs_periph_clk_enable(struct clk_hw *hw) { struct mpfs_periph_hw_clock *periph_hw = to_mpfs_periph_clk(hw); @@ -320,8 +423,9 @@ static int mpfs_clk_probe(struct platform_device *pdev) unsigned int num_clks; int ret; - /* CLK_RESERVED is not part of cfg_clks nor periph_clks, so add 1 */ - num_clks = ARRAY_SIZE(mpfs_cfg_clks) + ARRAY_SIZE(mpfs_periph_clks) + 1; + /* CLK_RESERVED is not part of clock arrays, so add 1 */ + num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_cfg_clks) + + ARRAY_SIZE(mpfs_periph_clks) + 1; clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws, num_clks), GFP_KERNEL); if (!clk_data) @@ -331,8 +435,17 @@ static int mpfs_clk_probe(struct platform_device *pdev) if (IS_ERR(clk_data->base)) return PTR_ERR(clk_data->base); + clk_data->msspll_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(clk_data->msspll_base)) + return PTR_ERR(clk_data->msspll_base); + clk_data->hw_data.num = num_clks; + ret = mpfs_clk_register_mssplls(dev, mpfs_msspll_clks, ARRAY_SIZE(mpfs_msspll_clks), + clk_data); + if (ret) + return ret; + ret = mpfs_clk_register_cfgs(dev, mpfs_cfg_clks, ARRAY_SIZE(mpfs_cfg_clks), clk_data); if (ret) return ret; From 1c6a7ea32b8cfb1725ef4def26eb9f5bc6e00303 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:35 +0100 Subject: [PATCH 259/803] clk: microchip: mpfs: add RTCREF clock control The reference clock used by the PolarFire SoC's onboard rtc was missing from the clock driver. Add this clock at the "config" clock level, with the external reference clock as its parent. Fixes: 635e5e73370e ("clk: microchip: Add driver for Microchip PolarFire SoC") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-9-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-mpfs.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 8338a4c15404..070c3b896559 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -15,6 +15,7 @@ #define REG_MSSPLL_POSTDIV_CR 0x10u #define REG_MSSPLL_SSCG_2_CR 0x2Cu #define REG_CLOCK_CONFIG_CR 0x08u +#define REG_RTC_CLOCK_CR 0x0Cu #define REG_SUBBLK_CLOCK_CR 0x84u #define REG_SUBBLK_RESET_CR 0x88u @@ -96,6 +97,17 @@ static const struct clk_div_table mpfs_div_ahb_table[] = { { 0, 0 } }; +/* + * The only two supported reference clock frequencies for the PolarFire SoC are + * 100 and 125 MHz, as the rtc reference is required to be 1 MHz. + * It therefore only needs to have divider table entries corresponding to + * divide by 100 and 125. + */ +static const struct clk_div_table mpfs_div_rtcref_table[] = { + { 100, 100 }, { 125, 125 }, + { 0, 0 } +}; + static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate) { struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw); @@ -234,6 +246,16 @@ static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = { REG_CLOCK_CONFIG_CR), CLK_CFG(CLK_AHB, "clk_ahb", "clk_msspll", 4, 2, mpfs_div_ahb_table, 0, REG_CLOCK_CONFIG_CR), + { + .cfg.id = CLK_RTCREF, + .cfg.shift = 0, + .cfg.width = 12, + .cfg.table = mpfs_div_rtcref_table, + .cfg.reg_offset = REG_RTC_CLOCK_CR, + .cfg.flags = CLK_DIVIDER_ONE_BASED, + .hw.init = + CLK_HW_INIT_PARENTS_DATA("clk_rtcref", mpfs_ext_ref, &mpfs_clk_cfg_ops, 0), + } }; static int mpfs_clk_register_cfg(struct device *dev, struct mpfs_cfg_hw_clock *cfg_hw, @@ -359,7 +381,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0), CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0), CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0), - CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(AHB), 4, 0), + CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0), CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL), CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0), CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0), From 6deb9bf4580d53fea191fa0689a4446c8937398d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 13 Apr 2022 08:58:36 +0100 Subject: [PATCH 260/803] riscv: dts: microchip: reparent mpfs clocks The 600M clock in the fabric is not the real reference, replace it with a 125M clock which is the correct value for the icicle kit. Rename the msspllclk node to mssrefclk since this is now the input to, not the output of, the msspll clock. Control of the msspll clock has been moved into the clock configurator, so add the register range for it to the clk configurator. Finally, add a new output of the clock config block which will provide the 1M reference clock for the MTIMER and the rtc. Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20220413075835.3354193-10-conor.dooley@microchip.com Acked-by: Palmer Dabbelt Signed-off-by: Stephen Boyd --- .../boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 2 +- arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index cd2fe80fa81a..3392153dd0f1 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -45,7 +45,7 @@ }; &refclk { - clock-frequency = <600000000>; + clock-frequency = <125000000>; }; &mmuart1 { diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index 3b48b7f35410..746c4d4e7686 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -141,7 +141,7 @@ }; }; - refclk: msspllclk { + refclk: mssrefclk { compatible = "fixed-clock"; #clock-cells = <0>; }; @@ -190,7 +190,7 @@ clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; - reg = <0x0 0x20002000 0x0 0x1000>; + reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; clocks = <&refclk>; #clock-cells = <1>; }; @@ -393,8 +393,8 @@ reg = <0x0 0x20124000 0x0 0x1000>; interrupt-parent = <&plic>; interrupts = <80>, <81>; - clocks = <&clkcfg CLK_RTC>; - clock-names = "rtc"; + clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>; + clock-names = "rtc", "rtcref"; status = "disabled"; }; From 7635a1ad8d92dcc8247b53f949e37795154b5b6f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 11 Apr 2022 08:42:10 -0700 Subject: [PATCH 261/803] iwlwifi: iwl-dbg: Use del_timer_sync() before freeing In Chrome OS, a large number of crashes is observed due to corrupted timer lists. Steven Rostedt pointed out that this usually happens when a timer is freed while still active, and that the problem is often triggered by code calling del_timer() instead of del_timer_sync() just before freeing. Steven also identified the iwlwifi driver as one of the possible culprits since it does exactly that. Reported-by: Steven Rostedt Cc: Steven Rostedt Cc: Johannes Berg Cc: Gregory Greenman Fixes: 60e8abd9d3e91 ("iwlwifi: dbg_ini: add periodic trigger new API support") Signed-off-by: Guenter Roeck Acked-by: Gregory Greenman Tested-by: Sedat Dilek # Linux v5.17.3-rc1 and Debian LLVM-14 Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220411154210.1870008-1-linux@roeck-us.net --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 866a33f49915..3237d4b528b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -371,7 +371,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans) struct iwl_dbg_tlv_timer_node *node, *tmp; list_for_each_entry_safe(node, tmp, timer_list, list) { - del_timer(&node->timer); + del_timer_sync(&node->timer); list_del(&node->list); kfree(node); } From 4dd4e6f659850f2df20b9612593f5a0f040549e1 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 12 Apr 2022 22:01:41 +0300 Subject: [PATCH 262/803] MAINTAINERS: update iwlwifi driver maintainer Set myself as a maintainer of iwlwifi driver as Luca is moving to a new role. Signed-off-by: Gregory Greenman Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220412190141.4543-1-gregory.greenman@intel.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c8fe97a8d60e..6a7a839acfe3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10129,7 +10129,7 @@ S: Supported F: drivers/net/wireless/intel/iwlegacy/ INTEL WIRELESS WIFI LINK (iwlwifi) -M: Luca Coelho +M: Gregory Greenman L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi From c95ce3a23dcda678f6f7811dd39b6d14eeb6f192 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 22 Apr 2022 08:26:53 +0200 Subject: [PATCH 263/803] topology: Fix up build warning in topology_is_visible() Commit aa63a74d4535 ("topology/sysfs: Hide PPIN on systems that do not support it.") caused a build warning on some configurations: drivers/base/topology.c: In function 'topology_is_visible': drivers/base/topology.c:158:24: warning: unused variable 'dev' [-Wunused-variable] 158 | struct device *dev = kobj_to_dev(kobj); Fix this up by getting rid of the variable entirely. Fixes: aa63a74d4535 ("topology/sysfs: Hide PPIN on systems that do not support it.") Cc: Tony Luck Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20220422062653.3899972-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/topology.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 706dbf8bf249..ac6ad9ab67f9 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -155,9 +155,7 @@ static struct attribute *default_attrs[] = { static umode_t topology_is_visible(struct kobject *kobj, struct attribute *attr, int unused) { - struct device *dev = kobj_to_dev(kobj); - - if (attr == &dev_attr_ppin.attr && !topology_ppin(dev->id)) + if (attr == &dev_attr_ppin.attr && !topology_ppin(kobj_to_dev(kobj)->id)) return 0; return attr->mode; From 165e3e17fe8fe6a8aab319bc6e631a2e23b9a857 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Apr 2022 16:52:41 -0400 Subject: [PATCH 264/803] sctp: check asoc strreset_chunk in sctp_generate_reconf_event A null pointer reference issue can be triggered when the response of a stream reconf request arrives after the timer is triggered, such as: send Incoming SSN Reset Request ---> CPU0: reconf timer is triggered, go to the handler code before hold sk lock <--- reply with Outgoing SSN Reset Request CPU1: process Outgoing SSN Reset Request, and set asoc->strreset_chunk to NULL CPU0: continue the handler code, hold sk lock, and try to hold asoc->strreset_chunk, crash! In Ying Xu's testing, the call trace is: [ ] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ ] RIP: 0010:sctp_chunk_hold+0xe/0x40 [sctp] [ ] Call Trace: [ ] [ ] sctp_sf_send_reconf+0x2c/0x100 [sctp] [ ] sctp_do_sm+0xa4/0x220 [sctp] [ ] sctp_generate_reconf_event+0xbd/0xe0 [sctp] [ ] call_timer_fn+0x26/0x130 This patch is to fix it by returning from the timer handler if asoc strreset_chunk is already set to NULL. Fixes: 7b9438de0cd4 ("sctp: add stream reconf timer") Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b3815b568e8e..463c4a58d2c3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t) goto out_unlock; } + /* This happens when the response arrives after the timer is triggered. */ + if (!asoc->strreset_chunk) + goto out_unlock; + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, From 45974e4276a8d6653394f66666fc57d8ffa6de9a Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Thu, 14 Apr 2022 10:50:54 +0200 Subject: [PATCH 265/803] ARM: dts: imx6ull-colibri: fix vqmmc regulator The correct spelling for the property is gpios. Otherwise, the regulator will neither reserve nor control any GPIOs. Thus, any SD/MMC card which can use UHS-I modes will fail. Fixes: c2e4987e0e02 ("ARM: dts: imx6ull: add Toradex Colibri iMX6ULL support") Signed-off-by: Max Krummenacher Signed-off-by: Denys Drozdov Signed-off-by: Marcel Ziswiler Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ull-colibri.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index 7f35a06dff95..951a2a6c5a65 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -37,7 +37,7 @@ reg_sd1_vmmc: regulator-sd1-vmmc { compatible = "regulator-gpio"; - gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_snvs_reg_sd>; regulator-always-on; From 0310b5aa0656a94102344f1e9ae2892e342a665d Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 18 Apr 2022 14:47:31 -0300 Subject: [PATCH 266/803] arm64: dts: imx8mn-ddr4-evk: Describe the 32.768 kHz PMIC clock The ROHM BD71847 PMIC has a 32.768 kHz clock. Describe the PMIC clock to fix the following boot errors: bd718xx-clk bd71847-clk.1.auto: No parent clk found bd718xx-clk: probe of bd71847-clk.1.auto failed with error -22 Based on the same fix done for imx8mm-evk as per commit a6a355ede574 ("arm64: dts: imx8mm-evk: Add 32.768 kHz clock to PMIC") Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 7dfee715a2c4..d8ce217c6016 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -59,6 +59,10 @@ interrupts = <3 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; + #clock-cells = <0>; + clocks = <&osc_32k 0>; + clock-output-names = "clk-32k-out"; + regulators { buck1_reg: BUCK1 { regulator-name = "buck1"; From 0c9843a74a85224a89daa81fa66891dae2f930e1 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Sun, 24 Apr 2022 03:14:30 +0000 Subject: [PATCH 267/803] pinctrl: pistachio: fix use of irq_of_parse_and_map() The irq_of_parse_and_map() function returns 0 on failure, and does not return an negative value. Fixes: cefc03e5995e ("pinctrl: Add Pistachio SoC pin control driver") Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Link: https://lore.kernel.org/r/20220424031430.3170759-1-lv.ruyi@zte.com.cn Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-pistachio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 8d271c6b0ca4..5de691c630b4 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1374,10 +1374,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl) } irq = irq_of_parse_and_map(child, 0); - if (irq < 0) { - dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq); + if (!irq) { + dev_err(pctl->dev, "No IRQ for bank %u\n", i); of_node_put(child); - ret = irq; + ret = -EINVAL; goto err; } From 5b47b751b760ee1c74a51660fd096aa148a362cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Mar 2022 11:51:55 +0100 Subject: [PATCH 268/803] eeprom: at25: Use DMA safe buffers Reading EEPROM fails with following warning: [ 16.357496] ------------[ cut here ]------------ [ 16.357529] fsl_spi b01004c0.spi: rejecting DMA map of vmalloc memory [ 16.357698] WARNING: CPU: 0 PID: 371 at include/linux/dma-mapping.h:326 fsl_spi_cpm_bufs+0x2a0/0x2d8 [ 16.357775] CPU: 0 PID: 371 Comm: od Not tainted 5.16.11-s3k-dev-01743-g19beecbfe9d6-dirty #109 [ 16.357806] NIP: c03fbc9c LR: c03fbc9c CTR: 00000000 [ 16.357825] REGS: e68d9b20 TRAP: 0700 Not tainted (5.16.11-s3k-dev-01743-g19beecbfe9d6-dirty) [ 16.357849] MSR: 00029032 CR: 24002282 XER: 00000000 [ 16.357931] [ 16.357931] GPR00: c03fbc9c e68d9be0 c26d06a0 00000039 00000001 c0d36364 c0e96428 00000027 [ 16.357931] GPR08: 00000001 00000000 00000023 3fffc000 24002282 100d3dd6 100a2ffc 00000000 [ 16.357931] GPR16: 100cd280 100b0000 00000000 aff54f7e 100d0000 100d0000 00000001 100cf328 [ 16.357931] GPR24: 100cf328 00000000 00000003 e68d9e30 c156b410 e67ab4c0 e68d9d38 c24ab278 [ 16.358253] NIP [c03fbc9c] fsl_spi_cpm_bufs+0x2a0/0x2d8 [ 16.358292] LR [c03fbc9c] fsl_spi_cpm_bufs+0x2a0/0x2d8 [ 16.358325] Call Trace: [ 16.358336] [e68d9be0] [c03fbc9c] fsl_spi_cpm_bufs+0x2a0/0x2d8 (unreliable) [ 16.358388] [e68d9c00] [c03fcb44] fsl_spi_bufs.isra.0+0x94/0x1a0 [ 16.358436] [e68d9c20] [c03fd970] fsl_spi_do_one_msg+0x254/0x3dc [ 16.358483] [e68d9cb0] [c03f7e50] __spi_pump_messages+0x274/0x8a4 [ 16.358529] [e68d9ce0] [c03f9d30] __spi_sync+0x344/0x378 [ 16.358573] [e68d9d20] [c03fb52c] spi_sync+0x34/0x60 [ 16.358616] [e68d9d30] [c03b4dec] at25_ee_read+0x138/0x1a8 [ 16.358667] [e68d9e50] [c04a8fb8] bin_attr_nvmem_read+0x98/0x110 [ 16.358725] [e68d9e60] [c0204b14] kernfs_fop_read_iter+0xc0/0x1fc [ 16.358774] [e68d9e80] [c0168660] vfs_read+0x284/0x410 [ 16.358821] [e68d9f00] [c016925c] ksys_read+0x6c/0x11c [ 16.358863] [e68d9f30] [c00160e0] ret_from_syscall+0x0/0x28 ... [ 16.359608] ---[ end trace a4ce3e34afef0cb5 ]--- [ 16.359638] fsl_spi b01004c0.spi: unable to map tx dma This is due to the AT25 driver using buffers on stack, which is not possible with CONFIG_VMAP_STACK. As mentionned in kernel Documentation (Documentation/spi/spi-summary.rst): - Follow standard kernel rules, and provide DMA-safe buffers in your messages. That way controller drivers using DMA aren't forced to make extra copies unless the hardware requires it (e.g. working around hardware errata that force the use of bounce buffering). Modify the driver to use a buffer located in the at25 device structure which is allocated via kmalloc during probe. Protect writes in this new buffer with the driver's mutex. Fixes: b587b13a4f67 ("[PATCH] SPI eeprom driver") Cc: stable Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/230a9486fc68ea0182df46255e42a51099403642.1648032613.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at25.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 91f96abbb3f9..8d169a35cf13 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -31,6 +31,8 @@ */ #define FM25_SN_LEN 8 /* serial number length */ +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + struct at25_data { struct spi_eeprom chip; struct spi_device *spi; @@ -39,6 +41,7 @@ struct at25_data { struct nvmem_config nvmem_config; struct nvmem_device *nvmem; u8 sernum[FM25_SN_LEN]; + u8 command[EE_MAXADDRLEN + 1]; }; #define AT25_WREN 0x06 /* latch the write enable */ @@ -61,8 +64,6 @@ struct at25_data { #define FM25_ID_LEN 9 /* ID length */ -#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ - /* * Specs often allow 5ms for a page write, sometimes 20ms; * it's important to recover from write timeouts. @@ -78,7 +79,6 @@ static int at25_ee_read(void *priv, unsigned int offset, { struct at25_data *at25 = priv; char *buf = val; - u8 command[EE_MAXADDRLEN + 1]; u8 *cp; ssize_t status; struct spi_transfer t[2]; @@ -92,12 +92,15 @@ static int at25_ee_read(void *priv, unsigned int offset, if (unlikely(!count)) return -EINVAL; - cp = command; + cp = at25->command; instr = AT25_READ; if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) if (offset >= BIT(at25->addrlen * 8)) instr |= AT25_INSTR_BIT3; + + mutex_lock(&at25->lock); + *cp++ = instr; /* 8/16/24-bit address is written MSB first */ @@ -116,7 +119,7 @@ static int at25_ee_read(void *priv, unsigned int offset, spi_message_init(&m); memset(t, 0, sizeof(t)); - t[0].tx_buf = command; + t[0].tx_buf = at25->command; t[0].len = at25->addrlen + 1; spi_message_add_tail(&t[0], &m); @@ -124,8 +127,6 @@ static int at25_ee_read(void *priv, unsigned int offset, t[1].len = count; spi_message_add_tail(&t[1], &m); - mutex_lock(&at25->lock); - /* * Read it all at once. * @@ -152,7 +153,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, spi_message_init(&m); memset(t, 0, sizeof(t)); - t[0].tx_buf = &command; + t[0].tx_buf = at25->command; t[0].len = 1; spi_message_add_tail(&t[0], &m); @@ -162,6 +163,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, mutex_lock(&at25->lock); + at25->command[0] = command; + status = spi_sync(at25->spi, &m); dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status); From b4f3d5f06e29b7020f19cc788b2c2de750e888a1 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 11 Apr 2022 00:00:59 -0500 Subject: [PATCH 269/803] clk: sunxi-ng: sun6i-rtc: Mark rtc-32k as critical Because some newer hardware variants have multiple possible parents for the RTC's timekeeping clock, this driver models it as a "rtc-32k" clock. However, it does not add any consumer for this clock. This causes the common clock framework to disable it, preventing RTC time access. Since the RTC's timekeeping clock should always be enabled, regardless of which drivers are loaded, let's mark this clock as critical instead of adding a consumer in the RTC driver. Fixes: d91612d7f01a ("clk: sunxi-ng: Add support for the sun6i RTC clocks") Signed-off-by: Samuel Holland Acked-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220411050100.40964-1-samuel@sholland.org --- drivers/clk/sunxi-ng/ccu-sun6i-rtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c index ffb72d9a9c36..2f3ddc908ebd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c @@ -241,6 +241,7 @@ static struct clk_init_data rtc_32k_init_data = { .ops = &ccu_mux_ops, .parent_hws = rtc_32k_parents, .num_parents = ARRAY_SIZE(rtc_32k_parents), /* updated during probe */ + .flags = CLK_IS_CRITICAL, }; static struct ccu_mux rtc_32k_clk = { From ed911c9f9dcb26849fa688225f002ef2f2c50cf4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 18 Apr 2022 17:09:36 +0200 Subject: [PATCH 270/803] drm/i915: Fix DISP_POS_Y and DISP_HEIGHT defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 428cb15d5b00 ("drm/i915: Clean up pre-skl primary plane registers") introduced DISP_POS_Y and DISP_HEIGHT defines but accidentally set these their masks to REG_GENMASK(31, 0) instead of REG_GENMASK(31, 16). This breaks the primary display pane on at least pineview machines, fix the mask to fix the primary display pane only showing black. Tested on an Acer One AO532h with an Intel N450 SoC. Fixes: 428cb15d5b00 ("drm/i915: Clean up pre-skl primary plane registers") Cc: José Roberto de Souza Cc: Ville Syrjälä Signed-off-by: Hans de Goede Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220418150936.5499-1-hdegoede@redhat.com Reviewed-by: José Roberto de Souza (cherry picked from commit 681f8a5c6e372dbfd2a313ace417e7749543de1d) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3c87d77d2cf6..7748f7f20b95 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4345,12 +4345,12 @@ #define _DSPAADDR 0x70184 #define _DSPASTRIDE 0x70188 #define _DSPAPOS 0x7018C /* reserved */ -#define DISP_POS_Y_MASK REG_GENMASK(31, 0) +#define DISP_POS_Y_MASK REG_GENMASK(31, 16) #define DISP_POS_Y(y) REG_FIELD_PREP(DISP_POS_Y_MASK, (y)) #define DISP_POS_X_MASK REG_GENMASK(15, 0) #define DISP_POS_X(x) REG_FIELD_PREP(DISP_POS_X_MASK, (x)) #define _DSPASIZE 0x70190 -#define DISP_HEIGHT_MASK REG_GENMASK(31, 0) +#define DISP_HEIGHT_MASK REG_GENMASK(31, 16) #define DISP_HEIGHT(h) REG_FIELD_PREP(DISP_HEIGHT_MASK, (h)) #define DISP_WIDTH_MASK REG_GENMASK(15, 0) #define DISP_WIDTH(w) REG_FIELD_PREP(DISP_WIDTH_MASK, (w)) From c05d8332f5d23fa3b521911cbe55a2b67fb21248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Wed, 13 Apr 2022 11:28:26 +0300 Subject: [PATCH 271/803] drm/i915: Check EDID for HDR static metadata when choosing blc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have now seen panel (XMG Core 15 e21 laptop) advertizing support for Intel proprietary eDP backlight control via DPCD registers, but actually working only with legacy pwm control. This patch adds panel EDID check for possible HDR static metadata and Intel proprietary eDP backlight control is used only if that exists. Missing HDR static metadata is ignored if user specifically asks for Intel proprietary eDP backlight control via enable_dpcd_backlight parameter. v2 : - Ignore missing HDR static metadata if Intel proprietary eDP backlight control is forced via i915.enable_dpcd_backlight - Printout info message if panel is missing HDR static metadata and support for Intel proprietary eDP backlight control is detected Fixes: 4a8d79901d5b ("drm/i915/dp: Enable Intel's HDR backlight interface (only SDR for now)") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5284 Cc: Lyude Paul Cc: Mika Kahola Cc: Jani Nikula Cc: Filippo Falezza Cc: stable@vger.kernel.org Signed-off-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220413082826.120634-1-jouni.hogander@intel.com Reviewed-by: Lyude Paul (cherry picked from commit b4b157577cb1de13bee8bebc3576f1de6799a921) Signed-off-by: Joonas Lahtinen --- .../drm/i915/display/intel_dp_aux_backlight.c | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 97cf3cac0105..fb6cf30ee628 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -97,6 +97,14 @@ #define INTEL_EDP_BRIGHTNESS_OPTIMIZATION_1 0x359 +enum intel_dp_aux_backlight_modparam { + INTEL_DP_AUX_BACKLIGHT_AUTO = -1, + INTEL_DP_AUX_BACKLIGHT_OFF = 0, + INTEL_DP_AUX_BACKLIGHT_ON = 1, + INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2, + INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3, +}; + /* Intel EDP backlight callbacks */ static bool intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) @@ -126,6 +134,24 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) return false; } + /* + * If we don't have HDR static metadata there is no way to + * runtime detect used range for nits based control. For now + * do not use Intel proprietary eDP backlight control if we + * don't have this data in panel EDID. In case we find panel + * which supports only nits based control, but doesn't provide + * HDR static metadata we need to start maintaining table of + * ranges for such panels. + */ + if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && + !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type & + BIT(HDMI_STATIC_METADATA_TYPE1))) { + drm_info(&i915->drm, + "Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n", + INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL); + return false; + } + panel->backlight.edp.intel.sdr_uses_aux = tcon_cap[2] & INTEL_EDP_SDR_TCON_BRIGHTNESS_AUX_CAP; @@ -413,14 +439,6 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = { .get = intel_dp_aux_vesa_get_backlight, }; -enum intel_dp_aux_backlight_modparam { - INTEL_DP_AUX_BACKLIGHT_AUTO = -1, - INTEL_DP_AUX_BACKLIGHT_OFF = 0, - INTEL_DP_AUX_BACKLIGHT_ON = 1, - INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2, - INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3, -}; - int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) { struct drm_device *dev = connector->base.dev; From 1aa24a8f3b5133dae4bc1e57427e345445f3e902 Mon Sep 17 00:00:00 2001 From: Xiaobing Luo Date: Sat, 23 Apr 2022 15:12:04 +0000 Subject: [PATCH 272/803] cpufreq: fix memory leak in sun50i_cpufreq_nvmem_probe -------------------------------------------- unreferenced object 0xffff000010742a00 (size 128): comm "swapper/0", pid 1, jiffies 4294902015 (age 1187.652s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000b4dfebaa>] __kmalloc+0x338/0x474 [<00000000d6e716db>] sun50i_cpufreq_nvmem_probe+0xc4/0x36c [<000000007d6082a0>] platform_probe+0x98/0x11c [<00000000c990f549>] really_probe+0x234/0x5a0 [<000000002d9fecc6>] __driver_probe_device+0x194/0x224 [<00000000cf0b94fa>] driver_probe_device+0x64/0x13c [<00000000f238e4cf>] __device_attach_driver+0xf8/0x180 [<000000006720e418>] bus_for_each_drv+0xf8/0x160 [<00000000df4f14f6>] __device_attach+0x174/0x29c [<00000000782002fb>] device_initial_probe+0x20/0x30 [<00000000c2681b06>] bus_probe_device+0xfc/0x110 [<00000000964cf3bd>] device_add+0x5f0/0xcd0 [<000000004b9264e3>] platform_device_add+0x198/0x390 [<00000000fa82a9d0>] platform_device_register_full+0x178/0x210 [<000000009a5daf13>] sun50i_cpufreq_init+0xf8/0x168 [<000000000377cc7c>] do_one_initcall+0xe4/0x570 -------------------------------------------- if sun50i_cpufreq_get_efuse failed, then opp_tables leak. Fixes: f328584f7bff ("cpufreq: Add sun50i nvmem based CPU scaling driver") Signed-off-by: Xiaobing Luo Reviewed-by: Samuel Holland Signed-off-by: Viresh Kumar --- drivers/cpufreq/sun50i-cpufreq-nvmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 2deed8d8773f..75e1bf3a08f7 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) return -ENOMEM; ret = sun50i_cpufreq_get_efuse(&speed); - if (ret) + if (ret) { + kfree(opp_tables); return ret; + } snprintf(name, MAX_NAME_LEN, "speed%d", speed); From b7c81f80246fac44077166f3e07103affe6db8ff Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Sat, 9 Apr 2022 13:12:41 +0900 Subject: [PATCH 273/803] firewire: fix potential uaf in outbound_phy_packet_callback() &e->event and e point to the same address, and &e->event could be freed in queue_event. So there is a potential uaf issue if we dereference e after calling queue_event(). Fix this by adding a temporary variable to maintain e->client in advance, this can avoid the potential uaf issue. Cc: Signed-off-by: Chengfeng Ye Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-cdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 9f89c17730b1..708e417200f4 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1500,6 +1500,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); + struct client *e_client; switch (status) { /* expected: */ @@ -1516,9 +1517,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; + e_client = e->client; queue_event(e->client, &e->event, &e->phy_packet, sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0); - client_put(e->client); + client_put(e_client); } static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) From 9423973869bd4632ffe669f950510c49296656e0 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Sat, 9 Apr 2022 13:12:42 +0900 Subject: [PATCH 274/803] firewire: remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Cc: Signed-off-by: Jakob Koschel Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-transaction.c | 30 +++++++++++++++-------------- drivers/firewire/sbp2.c | 13 +++++++------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index ac487c96bb71..6c20815cc8d1 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t) static int close_transaction(struct fw_transaction *transaction, struct fw_card *card, int rcode) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t == transaction) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter == transaction) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link != &card->transaction_list) { + if (t) { t->callback(card, rcode, NULL, 0, t->callback_data); return 0; } @@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request); void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) { - struct fw_transaction *t; + struct fw_transaction *t = NULL, *iter; unsigned long flags; u32 *data; size_t data_length; @@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) rcode = HEADER_GET_RCODE(p->header[1]); spin_lock_irqsave(&card->lock, flags); - list_for_each_entry(t, &card->transaction_list, link) { - if (t->node_id == source && t->tlabel == tlabel) { - if (!try_cancel_split_timeout(t)) { + list_for_each_entry(iter, &card->transaction_list, link) { + if (iter->node_id == source && iter->tlabel == tlabel) { + if (!try_cancel_split_timeout(iter)) { spin_unlock_irqrestore(&card->lock, flags); goto timed_out; } - list_del_init(&t->link); - card->tlabel_mask &= ~(1ULL << t->tlabel); + list_del_init(&iter->link); + card->tlabel_mask &= ~(1ULL << iter->tlabel); + t = iter; break; } } spin_unlock_irqrestore(&card->lock, flags); - if (&t->link == &card->transaction_list) { + if (!t) { timed_out: fw_notice(card, "unsolicited response (source %x, tlabel %x)\n", source, tlabel); diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 85cd379fd383..60051c0cabea 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, void *payload, size_t length, void *callback_data) { struct sbp2_logical_unit *lu = callback_data; - struct sbp2_orb *orb; + struct sbp2_orb *orb = NULL, *iter; struct sbp2_status status; unsigned long flags; @@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, /* Lookup the orb corresponding to this status write. */ spin_lock_irqsave(&lu->tgt->lock, flags); - list_for_each_entry(orb, &lu->orb_list, link) { + list_for_each_entry(iter, &lu->orb_list, link) { if (STATUS_GET_ORB_HIGH(status) == 0 && - STATUS_GET_ORB_LOW(status) == orb->request_bus) { - orb->rcode = RCODE_COMPLETE; - list_del(&orb->link); + STATUS_GET_ORB_LOW(status) == iter->request_bus) { + iter->rcode = RCODE_COMPLETE; + list_del(&iter->link); + orb = iter; break; } } spin_unlock_irqrestore(&lu->tgt->lock, flags); - if (&orb->link != &lu->orb_list) { + if (orb) { orb->callback(orb, &status); kref_put(&orb->kref, free_orb); /* orb callback reference */ } else { From a7ecbe92b9243edbe94772f6f2c854e4142a3345 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Sat, 9 Apr 2022 13:12:43 +0900 Subject: [PATCH 275/803] firewire: core: extend card->lock in fw_core_handle_bus_reset card->local_node and card->bm_retries are both always accessed under card->lock. fw_core_handle_bus_reset has a check whose condition depends on card->local_node and whose body writes to card->bm_retries. Both of these accesses are not under card->lock. Move the lock acquiring of card->lock to before this check such that these accesses do happen when card->lock is held. fw_destroy_nodes is called inside the check. Since fw_destroy_nodes already acquires card->lock inside its function body, move this out to the callsites of fw_destroy_nodes. Also add a comment to indicate which locking is necessary when calling fw_destroy_nodes. Cc: Signed-off-by: Niels Dossche Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-card.c | 3 +++ drivers/firewire/core-topology.c | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 54be88167c60..f3b3953cac83 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release); void fw_core_remove_card(struct fw_card *card) { struct fw_card_driver dummy_driver = dummy_driver_template; + unsigned long flags; card->driver->update_phy_reg(card, 4, PHY_LINK_ACTIVE | PHY_CONTENDER, 0); @@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card) dummy_driver.stop_iso = card->driver->stop_iso; card->driver = &dummy_driver; + spin_lock_irqsave(&card->lock, flags); fw_destroy_nodes(card); + spin_unlock_irqrestore(&card->lock, flags); /* Wait for all users, especially device workqueue jobs, to finish. */ fw_card_put(card); diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index b63d55f5ebd3..f40c81534381 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -375,16 +375,13 @@ static void report_found_node(struct fw_card *card, card->bm_retries = 0; } +/* Must be called with card->lock held */ void fw_destroy_nodes(struct fw_card *card) { - unsigned long flags; - - spin_lock_irqsave(&card->lock, flags); card->color++; if (card->local_node != NULL) for_each_fw_node(card, card->local_node, report_lost_node); card->local_node = NULL; - spin_unlock_irqrestore(&card->lock, flags); } static void move_tree(struct fw_node *node0, struct fw_node *node1, int port) @@ -510,6 +507,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, struct fw_node *local_node; unsigned long flags; + spin_lock_irqsave(&card->lock, flags); + /* * If the selfID buffer is not the immediate successor of the * previously processed one, we cannot reliably compare the @@ -521,8 +520,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, card->bm_retries = 0; } - spin_lock_irqsave(&card->lock, flags); - card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated; card->node_id = node_id; /* From 3b79954fd00d540677c97a560622b73f3a1f4e28 Mon Sep 17 00:00:00 2001 From: Zihao Wang Date: Sun, 24 Apr 2022 16:41:20 +0800 Subject: [PATCH 276/803] ALSA: hda/realtek: Add quirk for Yoga Duet 7 13ITL6 speakers Lenovo Yoga Duet 7 13ITL6 has Realtek ALC287 and built-in speakers do not work out of the box. The fix developed for Yoga 7i 14ITL5 also enables speaker output for this model. Signed-off-by: Zihao Wang Cc: Link: https://lore.kernel.org/r/20220424084120.74125-1-wzhd@ustc.edu Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c65d3dbc6cc9..cf531c1efa13 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9247,6 +9247,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), From eb9d84b0ffe39893cb23b0b6712bbe3637fa25fa Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 24 Apr 2022 19:24:28 +0900 Subject: [PATCH 277/803] ALSA: fireworks: fix wrong return count shorter than expected by 4 bytes ALSA fireworks driver has a bug in its initial state to return count shorter than expected by 4 bytes to userspace applications when handling response frame for Echo Audio Fireworks transaction. It's due to missing addition of the size for the type of event in ALSA firewire stack. Fixes: 555e8a8f7f14 ("ALSA: fireworks: Add command/response functionality into hwdep interface") Cc: Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220424102428.21109-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks_hwdep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c index 626c0c34b0b6..3a53914277d3 100644 --- a/sound/firewire/fireworks/fireworks_hwdep.c +++ b/sound/firewire/fireworks/fireworks_hwdep.c @@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE; if (copy_to_user(buf, &type, sizeof(type))) return -EFAULT; + count += sizeof(type); remained -= sizeof(type); buf += sizeof(type); From b9b1e0da5800a41a537f3bd1c294e492dad5cc9e Mon Sep 17 00:00:00 2001 From: Rongguang Wei Date: Wed, 20 Apr 2022 10:38:04 +0800 Subject: [PATCH 278/803] netfilter: flowtable: Remove the empty file CONFIG_NF_FLOW_TABLE_IPV4 is already removed and the real user is also removed(nf_flow_table_ipv4.c is empty). Fixes: c42ba4290b2147aa ("netfilter: flowtable: remove ipv4/ipv6 modules") Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_flow_table_ipv4.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c deleted file mode 100644 index e69de29bb2d1..000000000000 From dc9b0dc4561dedd44b2bf4b8e5ef1a8a040b2424 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 12 Mar 2022 11:09:34 +0100 Subject: [PATCH 279/803] libceph: disambiguate cluster/pool full log message Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1c5815530e0d..83eb97c94e83 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2385,7 +2385,11 @@ again: if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { err = -ENOSPC; } else { - pr_warn_ratelimited("FULL or reached pool quota\n"); + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) + pr_warn_ratelimited("cluster is full (osdmap FULL)\n"); + else + pr_warn_ratelimited("pool %lld is full or reached quota\n", + req->r_t.base_oloc.pool); req->r_t.paused = true; maybe_request_map(osdc); } From 7f47f7f3b3c33fd2b4a662cd43cd1af96e1a297e Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Tue, 15 Mar 2022 16:29:47 +0100 Subject: [PATCH 280/803] ceph: get snap_rwsem read lock in handle_cap_export for ceph_add_cap ceph_add_cap says in its function documentation that the caller should hold the read lock on the session snap_rwsem. Furthermore, not only ceph_add_cap needs that lock, when it calls to ceph_lookup_snap_realm it eventually calls ceph_get_snap_realm which states via lockdep that snap_rwsem needs to be held. handle_cap_export calls ceph_add_cap without that mdsc->snap_rwsem held. Thus, since ceph_get_snap_realm and ceph_add_cap both need the lock, the common place to acquire that lock is inside handle_cap_export. Signed-off-by: Niels Dossche Reviewed-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f1ad6884d4da..31204cdc2fbf 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3870,6 +3870,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n", inode, ci, mds, mseq, target); retry: + down_read(&mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) @@ -3933,6 +3934,7 @@ retry: } spin_unlock(&ci->i_ceph_lock); + up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); /* open target session */ @@ -3958,6 +3960,7 @@ retry: out_unlock: spin_unlock(&ci->i_ceph_lock); + up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); if (tsession) { mutex_unlock(&tsession->s_mutex); From 396ea1681892211dd3fbc1a58bfc2c2c971433e3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 22 Mar 2022 11:03:13 +0800 Subject: [PATCH 281/803] ceph: remove incorrect session state check Once the session is opened the s->s_ttl will be set, and when receiving a new mdsmap and the MDS map is changed, it will be possibly will close some sessions and open new ones. And then some sessions will be in CLOSING state evening without unmounting. URL: https://tracker.ceph.com/issues/54979 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fa38c013126d..00c3de177dd6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4434,8 +4434,6 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) bool check_session_state(struct ceph_mds_session *s) { - struct ceph_fs_client *fsc = s->s_mdsc->fsc; - switch (s->s_state) { case CEPH_MDS_SESSION_OPEN: if (s->s_ttl && time_after(jiffies, s->s_ttl)) { @@ -4444,10 +4442,6 @@ bool check_session_state(struct ceph_mds_session *s) } break; case CEPH_MDS_SESSION_CLOSING: - /* Should never reach this when not force unmounting */ - WARN_ON_ONCE(s->s_ttl && - READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN); - fallthrough; case CEPH_MDS_SESSION_NEW: case CEPH_MDS_SESSION_RESTARTING: case CEPH_MDS_SESSION_CLOSED: From 7acae6183cf37c48b8da48bbbdb78820fb3913f3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Apr 2022 09:07:21 +0800 Subject: [PATCH 282/803] ceph: fix possible NULL pointer dereference for req->r_session The request will be inserted into the ci->i_unsafe_dirops before assigning the req->r_session, so it's possible that we will hit NULL pointer dereference bug here. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55327 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Tested-by: Aaron Tomlin Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 31204cdc2fbf..5c14ef04e474 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2274,6 +2274,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { @@ -2294,6 +2296,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; + if (!s) + continue; if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); for (i = 0; i < max_sessions; i++) { From 8ddffdb9442a9d60b4a6e679ac48d7d21403a674 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 19 Apr 2022 15:47:00 +0200 Subject: [PATCH 283/803] netfilter: Update ip6_route_me_harder to consider L3 domain The commit referenced below fixed packet re-routing if Netfilter mangles a routing key property of a packet and the packet is routed in a VRF L3 domain. The fix, however, addressed IPv4 re-routing, only. This commit applies the same behavior for IPv6. While at it, untangle the nested ternary operator to make the code more readable. Fixes: 6d8b49c3a3a3 ("netfilter: Update ip_route_me_harder to consider L3 domain") Cc: stable@vger.kernel.org Signed-off-by: Martin Willi Reviewed-by: David Ahern Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1da332450d98..8ce60ab89015 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); + struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { - .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, @@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff }; int err; + if (sk && sk->sk_bound_dev_if) + fl6.flowi6_oif = sk->sk_bound_dev_if; + else if (strict) + fl6.flowi6_oif = dev->ifindex; + else + fl6.flowi6_oif = l3mdev_master_ifindex(dev); + fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; From e98365afc1e94ea1609268866a44112b3572c58b Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sun, 24 Apr 2022 20:57:20 +0800 Subject: [PATCH 284/803] net: hns3: clear inited state and stop client after failed to register netdev If failed to register netdev, it needs to clear INITED state and stop client in case of cause problem when concurrency with uninitialized process of driver. Fixes: a289a7e5c1d4 ("net: hns3: put off calling register_netdev() until client initialize complete") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 14dc12c2155d..a3ee7875d6a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5203,6 +5203,13 @@ static void hns3_state_init(struct hnae3_handle *handle) set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state); } +static void hns3_state_uninit(struct hnae3_handle *handle) +{ + struct hns3_nic_priv *priv = handle->priv; + + clear_bit(HNS3_NIC_STATE_INITED, &priv->state); +} + static int hns3_client_init(struct hnae3_handle *handle) { struct pci_dev *pdev = handle->pdev; @@ -5320,7 +5327,9 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_reg_netdev_fail: + hns3_state_uninit(handle); hns3_dbg_uninit(handle); + hns3_client_stop(handle); out_client_start: hns3_free_rx_cpu_rmap(netdev); hns3_nic_uninit_irq(priv); From 1ec1968e4e439c9e05245f9a44e7a65429b0d7e6 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 24 Apr 2022 20:57:21 +0800 Subject: [PATCH 285/803] net: hns3: align the debugfs output to the left For debugfs node rx/tx_queue_info and rx/tx_bd_info, their output info is aligned to the right, it's not aligned with output of other debugfs node, so uniform their output info. Fixes: 907676b13071 ("net: hns3: use tx bounce buffer for small packets") Fixes: e44c495d95e0 ("net: hns3: refactor queue info of debugfs") Fixes: 77e9184869c9 ("net: hns3: refactor dump bd info of debugfs") Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3_debugfs.c | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 44d9b560b337..93aeb615191d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -562,12 +562,12 @@ static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf, for (i = 0; i < ring_num; i++) { j = 0; - sprintf(result[j++], "%8u", i); - sprintf(result[j++], "%9u", ring->tx_copybreak); - sprintf(result[j++], "%3u", tx_spare->len); - sprintf(result[j++], "%3u", tx_spare->next_to_use); - sprintf(result[j++], "%3u", tx_spare->next_to_clean); - sprintf(result[j++], "%3u", tx_spare->last_to_clean); + sprintf(result[j++], "%u", i); + sprintf(result[j++], "%u", ring->tx_copybreak); + sprintf(result[j++], "%u", tx_spare->len); + sprintf(result[j++], "%u", tx_spare->next_to_use); + sprintf(result[j++], "%u", tx_spare->next_to_clean); + sprintf(result[j++], "%u", tx_spare->last_to_clean); sprintf(result[j++], "%pad", &tx_spare->dma); hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items, @@ -598,35 +598,35 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring, u32 base_add_l, base_add_h; u32 j = 0; - sprintf(result[j++], "%8u", index); + sprintf(result[j++], "%u", index); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BD_NUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BD_LEN_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_PKTNUM_RECORD_REG)); - sprintf(result[j++], "%9u", ring->rx_copybreak); + sprintf(result[j++], "%u", ring->rx_copybreak); - sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG) ? "on" : "off"); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_EN_REG) ? "on" : "off"); else - sprintf(result[j++], "%10s", "NA"); + sprintf(result[j++], "%s", "NA"); base_add_h = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BASEADDR_H_REG); @@ -700,36 +700,36 @@ static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring, u32 base_add_l, base_add_h; u32 j = 0; - sprintf(result[j++], "%8u", index); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", index); + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_BD_NUM_REG)); - sprintf(result[j++], "%2u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TC_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_FBDNUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_OFFSET_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_PKTNUM_RECORD_REG)); - sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG) ? "on" : "off"); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_EN_REG) ? "on" : "off"); else - sprintf(result[j++], "%10s", "NA"); + sprintf(result[j++], "%s", "NA"); base_add_h = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_BASEADDR_H_REG); @@ -848,15 +848,15 @@ static void hns3_dump_rx_bd_info(struct hns3_nic_priv *priv, { unsigned int j = 0; - sprintf(result[j++], "%5d", idx); + sprintf(result[j++], "%d", idx); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.l234_info)); - sprintf(result[j++], "%7u", le16_to_cpu(desc->rx.pkt_len)); - sprintf(result[j++], "%4u", le16_to_cpu(desc->rx.size)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.pkt_len)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.size)); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.rss_hash)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->rx.fd_id)); - sprintf(result[j++], "%8u", le16_to_cpu(desc->rx.vlan_tag)); - sprintf(result[j++], "%15u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb)); - sprintf(result[j++], "%11u", le16_to_cpu(desc->rx.ot_vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.fd_id)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.ot_vlan_tag)); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.bd_base_info)); if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) { u32 ol_info = le32_to_cpu(desc->rx.ol_info); @@ -930,19 +930,19 @@ static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv, { unsigned int j = 0; - sprintf(result[j++], "%6d", idx); + sprintf(result[j++], "%d", idx); sprintf(result[j++], "%#llx", le64_to_cpu(desc->addr)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.vlan_tag)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.send_size)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.send_size)); sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.type_cs_vlan_tso_len)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.outer_vlan_tag)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.tv)); - sprintf(result[j++], "%10u", + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.outer_vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.tv)); + sprintf(result[j++], "%u", le32_to_cpu(desc->tx.ol_type_vlan_len_msec)); sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.paylen_ol4cs)); sprintf(result[j++], "%#x", le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.mss_hw_csum)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.mss_hw_csum)); } static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len) From 123521b6b260d901937d3fb598ab88d260c857a6 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Sun, 24 Apr 2022 20:57:22 +0800 Subject: [PATCH 286/803] net: hns3: fix error log of tx/rx tqps stats The comments in function hclge_comm_tqps_update_stats is not right, so fix it. Fixes: 287db5c40d15 ("net: hns3: create new set of common tqp stats APIs for PF and VF reuse") Signed-off-by: Peng Li Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index 0c60f41fca8a..f3c9395d8351 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -75,7 +75,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, - "failed to get tqp stat, ret = %d, tx = %u.\n", + "failed to get tqp stat, ret = %d, rx = %u.\n", ret, i); return ret; } @@ -89,7 +89,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, - "failed to get tqp stat, ret = %d, rx = %u.\n", + "failed to get tqp stat, ret = %d, tx = %u.\n", ret, i); return ret; } From 48009e9972974c52a5f649f761862dd67bce3d13 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Sun, 24 Apr 2022 20:57:23 +0800 Subject: [PATCH 287/803] net: hns3: modify the return code of hclge_get_ring_chain_from_mbx Currently, function hclge_get_ring_chain_from_mbx will return -ENOMEM if ring_num is bigger than HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM. It is better to return -EINVAL for the invalid parameter case. So this patch fixes it by return -EINVAL in this abnormal branch. Fixes: 5d02a58dae60 ("net: hns3: fix for buffer overflow smatch warning") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 6799d16de34b..36cbafc5f944 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -176,7 +176,7 @@ static int hclge_get_ring_chain_from_mbx( ring_num = req->msg.ring_num; if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM) - return -ENOMEM; + return -EINVAL; for (i = 0; i < ring_num; i++) { if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { From 7d413735cb18ff73aaba3457b16b08332e8d3cc4 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sun, 24 Apr 2022 20:57:24 +0800 Subject: [PATCH 288/803] net: hns3: add validity check for message data length Add validity check for message data length in function hclge_send_mbx_msg(), avoid unexpected overflow. Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 36cbafc5f944..53f939923c28 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -94,6 +94,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, enum hclge_comm_cmd_status status; struct hclge_desc desc; + if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) { + dev_err(&hdev->pdev->dev, + "msg data length(=%u) exceeds maximum(=%u)\n", + msg_len, HCLGE_MBX_MAX_MSG_SIZE); + return -EMSGSIZE; + } + resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false); From c59d606296842409a6e5a4828235b0bd46b12bc4 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sun, 24 Apr 2022 20:57:25 +0800 Subject: [PATCH 289/803] net: hns3: add return value for mailbox handling in PF Currently, there are some querying mailboxes sent from VF to PF, and VF will wait the PF's handling result. For mailbox HCLGE_MBX_GET_QID_IN_PF and HCLGE_MBX_GET_RSS_KEY, it may fail when the input parameter is invalid, but the prototype of their handler function is void. In this case, PF always return success to VF, which may cause the VF get incorrect result. Fixes it by adding return value for these function. Fixes: 63b1279d9905 ("net: hns3: check queue id range before using") Fixes: 532cfc0df1e4 ("net: hns3: add a check for index in hclge_get_rss_key()") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_mbx.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 53f939923c28..7998ca617a92 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -594,9 +594,9 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport, return hclge_set_vport_mtu(vport, mtu); } -static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - struct hclge_respond_to_vf_msg *resp_msg) +static int hclge_get_queue_id_in_pf(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { struct hnae3_handle *handle = &vport->nic; struct hclge_dev *hdev = vport->back; @@ -606,17 +606,18 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, if (queue_id >= handle->kinfo.num_tqps) { dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n", queue_id, mbx_req->mbx_src_vfid); - return; + return -EINVAL; } qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id); memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf)); resp_msg->len = sizeof(qid_in_pf); + return 0; } -static void hclge_get_rss_key(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - struct hclge_respond_to_vf_msg *resp_msg) +static int hclge_get_rss_key(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { #define HCLGE_RSS_MBX_RESP_LEN 8 struct hclge_dev *hdev = vport->back; @@ -634,13 +635,14 @@ static void hclge_get_rss_key(struct hclge_vport *vport, dev_warn(&hdev->pdev->dev, "failed to get the rss hash key, the index(%u) invalid !\n", index); - return; + return -EINVAL; } memcpy(resp_msg->data, &rss_cfg->rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN], HCLGE_RSS_MBX_RESP_LEN); resp_msg->len = HCLGE_RSS_MBX_RESP_LEN; + return 0; } static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code) @@ -816,10 +818,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev) "VF fail(%d) to set mtu\n", ret); break; case HCLGE_MBX_GET_QID_IN_PF: - hclge_get_queue_id_in_pf(vport, req, &resp_msg); + ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg); break; case HCLGE_MBX_GET_RSS_KEY: - hclge_get_rss_key(vport, req, &resp_msg); + ret = hclge_get_rss_key(vport, req, &resp_msg); break; case HCLGE_MBX_GET_LINK_MODE: hclge_get_link_mode(vport, req); From e85f8a9f162562af1a850b9e83ec384f2b6b56aa Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Thu, 21 Apr 2022 05:53:44 +0000 Subject: [PATCH 290/803] net: hns: Add missing fwnode_handle_put in hns_mac_init In one of the error paths of the device_for_each_child_node() loop in hns_mac_init, add missing call to fwnode_handle_put. Signed-off-by: Peng Wu Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 7edf8569514c..928d934cb21a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -1065,19 +1065,23 @@ int hns_mac_init(struct dsaf_device *dsaf_dev) device_for_each_child_node(dsaf_dev->dev, child) { ret = fwnode_property_read_u32(child, "reg", &port_id); if (ret) { + fwnode_handle_put(child); dev_err(dsaf_dev->dev, "get reg fail, ret=%d!\n", ret); return ret; } if (port_id >= max_port_num) { + fwnode_handle_put(child); dev_err(dsaf_dev->dev, "reg(%u) out of range!\n", port_id); return -EINVAL; } mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb), GFP_KERNEL); - if (!mac_cb) + if (!mac_cb) { + fwnode_handle_put(child); return -ENOMEM; + } mac_cb->fw_port = child; mac_cb->mac_id = (u8)port_id; dsaf_dev->mac_cb[port_id] = mac_cb; From 4e2e65e2e56c6ceb4ea1719360080c0af083229e Mon Sep 17 00:00:00 2001 From: liuyacan Date: Thu, 21 Apr 2022 17:40:27 +0800 Subject: [PATCH 291/803] net/smc: sync err code when tcp connection was refused In the current implementation, when TCP initiates a connection to an unavailable [ip,port], ECONNREFUSED will be stored in the TCP socket, but SMC will not. However, some apps (like curl) use getsockopt(,,SO_ERROR,,) to get the error information, which makes them miss the error message and behave strangely. Fixes: 50717a37db03 ("net/smc: nonblocking connect rework") Signed-off-by: liuyacan Reviewed-by: Tony Lu Acked-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index fc7b6eb22143..bbb1a4ce5050 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1475,6 +1475,8 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_state = SMC_CLOSED; if (rc == -EPIPE || rc == -EAGAIN) smc->sk.sk_err = EPIPE; + else if (rc == -ECONNREFUSED) + smc->sk.sk_err = ECONNREFUSED; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); sock_put(&smc->sk); /* passive closing */ From 9810c58c7051ae83e7ac326fca3daa823da6b778 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Apr 2022 18:46:13 +0300 Subject: [PATCH 292/803] net: lan966x: fix a couple off by one bugs The lan966x->ports[] array has lan966x->num_phys_ports elements. These are assigned in lan966x_probe(). That means the > comparison should be changed to >=. The first off by one check is harmless but the second one could lead to an out of bounds access and a crash. Fixes: 5ccd66e01cbe ("net: lan966x: add support for interrupts from analyzer") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index 2679111ef669..005e56ea5da1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -346,7 +346,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) continue; /* If the entry in SW is found, then there is nothing @@ -393,7 +393,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) continue; mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); From ff827beb706ed719c766acf36449801ded0c17fc Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 21 Apr 2022 15:07:57 -0700 Subject: [PATCH 293/803] ip_gre: Make o_seqno start from 0 in native mode For GRE and GRETAP devices, currently o_seqno starts from 1 in native mode. According to RFC 2890 2.2., "The first datagram is sent with a sequence number of 0." Fix it. It is worth mentioning that o_seqno already starts from 0 in collect_md mode, see gre_fb_xmit(), where tunnel->o_seqno is passed to gre_build_header() before getting incremented. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Peilin Ye Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 365caebf51ab..21a8943f6fa4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + __be16 flags = tunnel->parms.o_flags; /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, - tunnel->parms.o_flags, proto, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + flags, proto, tunnel->parms.o_key, + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } From fde98ae91f79cab4e020f40c35ed23cbdc59661c Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 21 Apr 2022 15:08:38 -0700 Subject: [PATCH 294/803] ip6_gre: Make o_seqno start from 0 in native mode For IP6GRE and IP6GRETAP devices, currently o_seqno starts from 1 in native mode. According to RFC 2890 2.2., "The first datagram is sent with a sequence number of 0." Fix it. It is worth mentioning that o_seqno already starts from 0 in collect_md mode, see the "if (tunnel->parms.collect_md)" clause in __gre6_xmit(), where tunnel->o_seqno is passed to gre_build_header() before getting incremented. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Peilin Ye Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 976236736146..d9e4ac94eab4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, { struct ip6_tnl *tunnel = netdev_priv(dev); __be16 protocol; + __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -739,7 +740,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, if (tunnel->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - __be16 flags; int tun_hlen; tun_info = skb_tunnel_info_txcheck(skb); @@ -770,15 +770,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, : 0); } else { - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + flags = tunnel->parms.o_flags; + + gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, From 31c417c948d7f6909cb63f0ac3298f3c38f8ce20 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 21 Apr 2022 15:09:02 -0700 Subject: [PATCH 295/803] ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode As pointed out by Jakub Kicinski, currently using TUNNEL_SEQ in collect_md mode is racy for [IP6]GRE[TAP] devices. Consider the following sequence of events: 1. An [IP6]GRE[TAP] device is created in collect_md mode using "ip link add ... external". "ip" ignores "[o]seq" if "external" is specified, so TUNNEL_SEQ is off, and the device is marked as NETIF_F_LLTX (i.e. it uses lockless TX); 2. Someone sets TUNNEL_SEQ on outgoing skb's, using e.g. bpf_skb_set_tunnel_key() in an eBPF program attached to this device; 3. gre_fb_xmit() or __gre6_xmit() processes these skb's: gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); ^^^^^^^^^^^^^^^^^ Since we are not using the TX lock (&txq->_xmit_lock), multiple CPUs may try to do this tunnel->o_seqno++ in parallel, which is racy. Fix it by making o_seqno atomic_t. As mentioned by Eric Dumazet in commit b790e01aee74 ("ip_gre: lockless xmit"), making o_seqno atomic_t increases "chance for packets being out of order at receiver" when NETIF_F_LLTX is on. Maybe a better fix would be: 1. Do not ignore "oseq" in external mode. Users MUST specify "oseq" if they want the kernel to allow sequencing of outgoing packets; 2. Reject all outgoing TUNNEL_SEQ packets if the device was not created with "oseq". Unfortunately, that would break userspace. We could now make [IP6]GRE[TAP] devices always NETIF_F_LLTX, but let us do it in separate patches to keep this fix minimal. Suggested-by: Jakub Kicinski Fixes: 77a5196a804e ("gre: add sequence number for collect md mode.") Signed-off-by: Peilin Ye Acked-by: William Tu Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 +- include/net/ip_tunnels.h | 2 +- net/ipv4/ip_gre.c | 6 +++--- net/ipv6/ip6_gre.c | 7 ++++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index a38c4f1e4e5c..74b369bddf49 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -58,7 +58,7 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ int encap_hlen; /* Encap header length (FOU,GUE) */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 88dee57eac8a..c24fa934221d 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,7 +116,7 @@ struct ip_tunnel { /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ - u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 21a8943f6fa4..aacee9dd771b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -464,7 +464,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -502,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -579,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d9e4ac94eab4..5136959b3dc5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -766,7 +766,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { @@ -777,7 +777,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) + : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -1055,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* Push GRE header. */ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) : htons(ETH_P_ERSPAN2); - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) From 7c762e70c50b462fabe44a597e2a6c3e56c236c0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 22 Apr 2022 01:42:22 +0300 Subject: [PATCH 296/803] net: dsa: flood multicast to CPU when slave has IFF_PROMISC Certain DSA switches can eliminate flooding to the CPU when none of the ports have the IFF_ALLMULTI or IFF_PROMISC flags set. This is done by synthesizing a call to dsa_port_bridge_flags() for the CPU port, a call which normally comes from the bridge driver via switchdev. The bridge port flags and IFF_PROMISC|IFF_ALLMULTI have slightly different semantics, and due to inattention/lack of proper testing, the IFF_PROMISC flag allows unknown unicast to be flooded to the CPU, but not unknown multicast. This must be fixed by setting both BR_FLOOD (unicast) and BR_MCAST_FLOOD in the synthesized dsa_port_bridge_flags() call, since IFF_PROMISC means that packets should not be filtered regardless of their MAC DA. Fixes: 7569459a52c9 ("net: dsa: manage flooding on the CPU ports") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 41c69a6e7854..8022d50584db 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp) if (other_dp->slave->flags & IFF_ALLMULTI) flags.val |= BR_MCAST_FLOOD; if (other_dp->slave->flags & IFF_PROMISC) - flags.val |= BR_FLOOD; + flags.val |= BR_FLOOD | BR_MCAST_FLOOD; } err = dsa_port_pre_bridge_flags(dp, flags, NULL); From 9323ac367005d6aa4d579311917c636c43206b53 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 22 Apr 2022 02:01:04 +0300 Subject: [PATCH 297/803] net: mscc: ocelot: ignore VID 0 added by 8021q module Both the felix DSA driver and ocelot switchdev driver declare dev->features & NETIF_F_HW_VLAN_CTAG_FILTER under certain circumstances*, so the 8021q module will add VID 0 to our RX filter when the port goes up, to ensure 802.1p traffic is not dropped. We treat VID 0 as a special value (OCELOT_STANDALONE_PVID) which deliberately does not have a struct ocelot_bridge_vlan associated with it. Instead, this gets programmed to the VLAN table in ocelot_vlan_init(). If we allow external calls to modify VID 0, we reach the following situation: # ip link add br0 type bridge vlan_filtering 1 && ip link set br0 up # ip link set swp0 master br0 # ip link set swp0 up # this adds VID 0 to ocelot->vlans with untagged=false bridge vlan port vlan-id swp0 1 PVID Egress Untagged # the bridge also adds VID 1 br0 1 PVID Egress Untagged # bridge vlan add dev swp0 vid 100 untagged Error: mscc_ocelot_switch_lib: Port with egress-tagged VLANs cannot have more than one egress-untagged (native) VLAN. This configuration should have been accepted, because ocelot_port_manage_port_tag() should select OCELOT_PORT_TAG_NATIVE. Yet it isn't, because we have an entry in ocelot->vlans which says VID 0 should be egress-tagged, something the hardware can't do. Fix this by suppressing additions/deletions on VID 0 and managing this VLAN exclusively using OCELOT_STANDALONE_PVID. *DSA toggles it when the port becomes VLAN-aware by joining a VLAN-aware bridge. Ocelot declares it unconditionally for some reason. Fixes: 54c319846086 ("net: mscc: ocelot: enforce FDB isolation when VLAN-unaware") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ee9c607d62a7..951c4529f6cd 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -629,6 +629,13 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, { int err; + /* Ignore VID 0 added to our RX filter by the 8021q module, since + * that collides with OCELOT_STANDALONE_PVID and changes it from + * egress-untagged to egress-tagged. + */ + if (!vid) + return 0; + err = ocelot_vlan_member_add(ocelot, port, vid, untagged); if (err) return err; @@ -651,6 +658,9 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) bool del_pvid = false; int err; + if (!vid) + return 0; + if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid) del_pvid = true; From 1fcb8fb3522f5b0f1cf0f5c7560cd6629abba0cb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 22 Apr 2022 02:01:05 +0300 Subject: [PATCH 298/803] net: mscc: ocelot: don't add VID 0 to ocelot->vlans when leaving VLAN-aware bridge DSA, through dsa_port_bridge_leave(), first notifies the port of the fact that it left a bridge, then, if that bridge was VLAN-aware, it notifies the port of the change in VLAN awareness state, towards VLAN-unaware mode. So ocelot_port_vlan_filtering() can be called when ocelot_port->bridge is NULL, and this makes ocelot_add_vlan_unaware_pvid() create a struct ocelot_bridge_vlan with a vid of 0 and an "untagged" setting of true on that port. In a way this structure correctly reflects the reality, but by design, VID 0 (OCELOT_STANDALONE_PVID) was not meant to be kept in the bridge VLAN list of the driver, but managed separately. Having OCELOT_STANDALONE_PVID in ocelot->vlans makes us trip up on several sanity checks that did not expect to have this VID there. For example, after we leave a VLAN-aware bridge and we re-join it, we can no longer program egress-tagged VLANs to hardware: # ip link add br0 type bridge vlan_filtering 1 && ip link set br0 up # ip link set swp0 master br0 # ip link set swp0 nomaster # ip link set swp0 master br0 # bridge vlan add dev swp0 vid 100 Error: mscc_ocelot_switch_lib: Port with more than one egress-untagged VLAN cannot have egress-tagged VLANs. But this configuration is in fact supported by the hardware, since we could use OCELOT_PORT_TAG_NATIVE. According to its comment: /* all VLANs except the native VLAN and VID 0 are egress-tagged */ yet when assessing the eligibility for this mode, we do not check for VID 0 in ocelot_port_uses_native_vlan(), instead we just ensure that ocelot_port_num_untagged_vlans() == 1. This is simply because VID 0 doesn't have a bridge VLAN structure. The way I identify the problem is that ocelot_port_vlan_filtering(false) only means to call ocelot_add_vlan_unaware_pvid() when we dynamically turn off VLAN awareness for a bridge we are under, and the PVID changes from the bridge PVID to a reserved PVID based on the bridge number. Since OCELOT_STANDALONE_PVID is statically added to the VLAN table during ocelot_vlan_init() and never removed afterwards, calling ocelot_add_vlan_unaware_pvid() for it is not intended and does not serve any purpose. Fix the issue by avoiding the call to ocelot_add_vlan_unaware_pvid(vid=0) when we're resetting VLAN awareness after leaving the bridge, to become a standalone port. Fixes: 54c319846086 ("net: mscc: ocelot: enforce FDB isolation when VLAN-unaware") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 951c4529f6cd..ca71b62a44dc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -551,7 +551,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; struct ocelot_port *ocelot_port = ocelot->ports[port]; struct ocelot_vcap_filter *filter; - int err; + int err = 0; u32 val; list_for_each_entry(filter, &block->rules, list) { @@ -570,7 +570,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, if (vlan_aware) err = ocelot_del_vlan_unaware_pvid(ocelot, port, ocelot_port->bridge); - else + else if (ocelot_port->bridge) err = ocelot_add_vlan_unaware_pvid(ocelot, port, ocelot_port->bridge); if (err) From 4bfe744ff1644fbc0a991a2677dc874475dd6776 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Apr 2022 17:34:07 -0700 Subject: [PATCH 299/803] tcp: fix potential xmit stalls caused by TCP_NOTSENT_LOWAT I had this bug sitting for too long in my pile, it is time to fix it. Thanks to Doug Porter for reminding me of it! We had various attempts in the past, including commit 0cbe6a8f089e ("tcp: remove SOCK_QUEUE_SHRUNK"), but the issue is that TCP stack currently only generates EPOLLOUT from input path, when tp->snd_una has advanced and skb(s) cleaned from rtx queue. If a flow has a big RTT, and/or receives SACKs, it is possible that the notsent part (tp->write_seq - tp->snd_nxt) reaches 0 and no more data can be sent until tp->snd_una finally advances. What is needed is to also check if POLLOUT needs to be generated whenever tp->snd_nxt is advanced, from output path. This bug triggers more often after an idle period, as we do not receive ACK for at least one RTT. tcp_notsent_lowat could be a fraction of what CWND and pacing rate would allow to send during this RTT. In a followup patch, I will remove the bogus call to tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED) from tcp_check_space(). Fact that we have decided to generate an EPOLLOUT does not mean the application has immediately refilled the transmit queue. This optimistic call might have been the reason the bug seemed not too serious. Tested: 200 ms rtt, 1% packet loss, 32 MB tcp_rmem[2] and tcp_wmem[2] $ echo 500000 >/proc/sys/net/ipv4/tcp_notsent_lowat $ cat bench_rr.sh SUM=0 for i in {1..10} do V=`netperf -H remote_host -l30 -t TCP_RR -- -r 10000000,10000 -o LOCAL_BYTES_SENT | egrep -v "MIGRATED|Bytes"` echo $V SUM=$(($SUM + $V)) done echo SUM=$SUM Before patch: $ bench_rr.sh 130000000 80000000 140000000 140000000 140000000 140000000 130000000 40000000 90000000 110000000 SUM=1140000000 After patch: $ bench_rr.sh 430000000 590000000 530000000 450000000 450000000 350000000 450000000 490000000 480000000 460000000 SUM=4680000000 # This is 410 % of the value before patch. Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option") Signed-off-by: Eric Dumazet Reported-by: Doug Porter Cc: Soheil Hassas Yeganeh Cc: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 12 +++++++++++- net/ipv4/tcp_output.c | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index be712fb9ddd7..b99d9d9cbd99 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -620,6 +620,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); +void tcp_check_space(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2088f93fa37b..48f607522860 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5454,7 +5454,17 @@ static void tcp_new_space(struct sock *sk) INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } -static void tcp_check_space(struct sock *sk) +/* Caller made space either from: + * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced) + * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt) + * + * We might be able to generate EPOLLOUT to the application if: + * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2 + * 2) notsent amount (tp->write_seq - tp->snd_nxt) became + * small enough that tcp_stream_memory_free() decides it + * is time to generate EPOLLOUT. + */ +void tcp_check_space(struct sock *sk) { /* pairs with tcp_poll() */ smp_mb(); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ede847f4199..1ca2f28c9981 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); + tcp_check_space(sk); } /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one From ba5a4fdd63ae0c575707030db0b634b160baddd7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Apr 2022 13:35:09 -0700 Subject: [PATCH 300/803] tcp: make sure treq->af_specific is initialized syzbot complained about a recent change in TCP stack, hitting a NULL pointer [1] tcp request sockets have an af_specific pointer, which was used before the blamed change only for SYNACK generation in non SYNCOOKIE mode. tcp requests sockets momentarily created when third packet coming from client in SYNCOOKIE mode were not using treq->af_specific. Make sure this field is populated, in the same way normal TCP requests sockets do in tcp_conn_request(). [1] TCP: request_sock_TCPv6: Possible SYN flooding on port 20002. Sending cookies. Check SNMP counters. general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 1 PID: 3695 Comm: syz-executor864 Not tainted 5.18.0-rc3-syzkaller-00224-g5fd1fe4807f9 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tcp_create_openreq_child+0xe16/0x16b0 net/ipv4/tcp_minisocks.c:534 Code: 48 c1 ea 03 80 3c 02 00 0f 85 e5 07 00 00 4c 8b b3 28 01 00 00 48 b8 00 00 00 00 00 fc ff df 49 8d 7e 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 c9 07 00 00 48 8b 3c 24 48 89 de 41 ff 56 08 48 RSP: 0018:ffffc90000de0588 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff888076490330 RCX: 0000000000000100 RDX: 0000000000000001 RSI: ffffffff87d67ff0 RDI: 0000000000000008 RBP: ffff88806ee1c7f8 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff87d67f00 R11: 0000000000000000 R12: ffff88806ee1bfc0 R13: ffff88801b0e0368 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f517fe58700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffcead76960 CR3: 000000006f97b000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_v6_syn_recv_sock+0x199/0x23b0 net/ipv6/tcp_ipv6.c:1267 tcp_get_cookie_sock+0xc9/0x850 net/ipv4/syncookies.c:207 cookie_v6_check+0x15c3/0x2340 net/ipv6/syncookies.c:258 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:1131 [inline] tcp_v6_do_rcv+0x1148/0x13b0 net/ipv6/tcp_ipv6.c:1486 tcp_v6_rcv+0x3305/0x3840 net/ipv6/tcp_ipv6.c:1725 ip6_protocol_deliver_rcu+0x2e9/0x1900 net/ipv6/ip6_input.c:422 ip6_input_finish+0x14c/0x2c0 net/ipv6/ip6_input.c:464 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip6_input+0x9c/0xd0 net/ipv6/ip6_input.c:473 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:76 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ipv6_rcv+0x27f/0x3b0 net/ipv6/ip6_input.c:297 __netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5405 __netif_receive_skb+0x24/0x1b0 net/core/dev.c:5519 process_backlog+0x3a0/0x7c0 net/core/dev.c:5847 __napi_poll+0xb3/0x6e0 net/core/dev.c:6413 napi_poll net/core/dev.c:6480 [inline] net_rx_action+0x8ec/0xc60 net/core/dev.c:6567 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1097 Fixes: 5b0b9e4c2c89 ("tcp: md5: incorrect tcp_header_len for incoming connections") Signed-off-by: Eric Dumazet Cc: Francesco Ruggeri Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/syncookies.c | 8 +++++++- net/ipv6/syncookies.c | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index b99d9d9cbd99..cc1295037533 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -480,6 +480,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2cb3b852d148..f33c31dd7366 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) { @@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return NULL; treq = tcp_rsk(req); + + /* treq->af_specific might be used to perform TCP_MD5 lookup */ + treq->af_specific = af_ops; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); @@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); if (!req) goto out; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d1b61d00368e..9cc123f000fb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); if (!req) goto out; From 2f477ee3ed92d7b3786778399cf3e08007721c0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Apr 2022 13:47:07 +0200 Subject: [PATCH 301/803] Revert "arm64: dts: tegra: Fix boolean properties with values" This reverts commit 1a67653de0dd, which caused a boot regression. The behavior of the "drive-push-pull" in the kernel does not match what the binding document describes. Revert Rob's patch to make the DT match the kernel again, rather than the binding. Link: https://lore.kernel.org/lkml/YlVAy95eF%2F9b1nmu@orome/ Reported-by: Thierry Reding Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi | 8 ++++---- .../boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra210-smaug.dts | 4 ++-- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index 1df84335925b..aff857df25cf 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -262,25 +262,25 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull; + drive-push-pull = <1>; }; gpio5 { pins = "gpio5"; function = "gpio"; - drive-push-pull; + drive-push-pull = <0>; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <0>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts index 1ab132c152bb..4631504c3c7a 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts @@ -462,25 +462,25 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull; + drive-push-pull = <1>; }; gpio5 { pins = "gpio5"; function = "gpio"; - drive-push-pull; + drive-push-pull = <0>; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index 634d0f493c2e..a7d7cfd66379 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -174,19 +174,19 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull; + drive-push-pull = <1>; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <0>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi index 0b219e72765e..0bd66f9c620b 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi @@ -148,19 +148,19 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull; + drive-push-pull = <1>; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <0>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 0fe772b04bd0..75eb743a7242 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -59,7 +59,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull; + drive-push-pull = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <7>; maxim,active-fps-power-down-slot = <0>; @@ -68,7 +68,7 @@ gpio2_3 { pins = "gpio2", "gpio3"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; }; @@ -80,7 +80,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi index 936a309e288c..10347b6e6e84 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi @@ -1351,7 +1351,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull; + drive-push-pull = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <7>; maxim,active-fps-power-down-slot = <0>; @@ -1360,14 +1360,14 @@ gpio2 { pins = "gpio2"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; }; gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; }; @@ -1379,7 +1379,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts index f6446120c267..72c2dc3c14ea 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts @@ -195,7 +195,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull; + drive-push-pull = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <0>; maxim,active-fps-power-down-slot = <7>; @@ -204,7 +204,7 @@ gpio2 { pins = "gpio2"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <0>; maxim,active-fps-power-down-slot = <7>; @@ -213,7 +213,7 @@ gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <4>; maxim,active-fps-power-down-slot = <3>; @@ -227,7 +227,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts index e42384f097d6..a263d51882ee 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts +++ b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts @@ -1386,7 +1386,7 @@ gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain; + drive-open-drain = <1>; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <4>; maxim,active-fps-power-down-slot = <2>; @@ -1395,7 +1395,7 @@ gpio5_6 { pins = "gpio5", "gpio6"; function = "gpio"; - drive-push-pull; + drive-push-pull = <1>; }; gpio4 { From 2fbe467bcbfc760a08f08475eea6bbd4c2874319 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 20:34:53 +0100 Subject: [PATCH 302/803] ASoC: max98090: Reject invalid values in custom control put() The max98090 driver has a custom put function for some controls which can only be updated in certain circumstances which makes no effort to validate that input is suitable for the control, allowing out of spec values to be written to the hardware and presented to userspace. Fix this by returning an error when invalid values are written. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220420193454.2647908-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index b45ec35cd63c..6d9261346842 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, val = (val >> mc->shift) & mask; + if (sel < 0 || sel > mc->max) + return -EINVAL; + *select = sel; /* Setting a volume is only valid if it is already On */ From 13fcf676d9e102594effc686d98521ff5c90b925 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 20 Apr 2022 20:34:54 +0100 Subject: [PATCH 303/803] ASoC: max98090: Generate notifications on changes for custom control The max98090 driver has some custom controls which share a put() function which returns 0 unconditionally, meaning that events are not generated when the value changes. Fix that. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220420193454.2647908-2-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 6d9261346842..62b41ca050a2 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -430,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, mask << mc->shift, sel << mc->shift); - return 0; + return *select != val; } static const char *max98090_perf_pwr_text[] = From 2bde1985e39173d8cb64005dad6f34e9bee4c750 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Thu, 21 Apr 2022 15:54:03 +0300 Subject: [PATCH 304/803] ASoC: atmel: mchp-pdmc: set prepare_slave_config Since a pointer to struct snd_dmaengine_pcm_config is passed, snd_dmaengine_pcm_prepare_slave_config() is no longer called unless it's explicitly set in prepare_slave_config. Fixes: 50291652af52 ("ASoC: atmel: mchp-pdmc: add PDMC driver") Suggested-by: Sascha Hauer Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220421125403.2180824-2-codrin.ciubotariu@microchip.com Signed-off-by: Mark Brown --- sound/soc/atmel/mchp-pdmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/atmel/mchp-pdmc.c b/sound/soc/atmel/mchp-pdmc.c index 1a7802fbf23c..a3856c73e221 100644 --- a/sound/soc/atmel/mchp-pdmc.c +++ b/sound/soc/atmel/mchp-pdmc.c @@ -966,6 +966,7 @@ static int mchp_pdmc_process(struct snd_pcm_substream *substream, static struct snd_dmaengine_pcm_config mchp_pdmc_config = { .process = mchp_pdmc_process, + .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, }; static int mchp_pdmc_probe(struct platform_device *pdev) From 660564fc9a92a893a14f255be434f7ea0b967901 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Thu, 21 Apr 2022 15:54:02 +0300 Subject: [PATCH 305/803] ASoC: dmaengine: Restore NULL prepare_slave_config() callback As pointed out by Sascha Hauer, this patch changes: if (pmc->config && !pcm->config->prepare_slave_config) to: if (pmc->config && !pcm->config->prepare_slave_config) snd_dmaengine_pcm_prepare_slave_config() This breaks the drivers that do not need a call to dmaengine_slave_config(). Drivers that still need to call snd_dmaengine_pcm_prepare_slave_config(), but have a NULL pcm->config->prepare_slave_config should use snd_dmaengine_pcm_prepare_slave_config() as their prepare_slave_config callback. Fixes: 9a1e13440a4f ("ASoC: dmaengine: do not use a NULL prepare_slave_config() callback") Reported-by: Sascha Hauer Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220421125403.2180824-1-codrin.ciubotariu@microchip.com Signed-off-by: Mark Brown --- sound/soc/soc-generic-dmaengine-pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index 2ab2ddc1294d..285441d6aeed 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component, memset(&slave_config, 0, sizeof(slave_config)); - if (pcm->config && pcm->config->prepare_slave_config) - prepare_slave_config = pcm->config->prepare_slave_config; - else + if (!pcm->config) prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; + else + prepare_slave_config = pcm->config->prepare_slave_config; if (prepare_slave_config) { int ret = prepare_slave_config(substream, params, &slave_config); From 8717627d6ac53251ee012c3c7aca392f29f38a42 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 18 Apr 2022 20:57:31 +0200 Subject: [PATCH 306/803] random: document crng_fast_key_erasure() destination possibility This reverts 35a33ff3807d ("random: use memmove instead of memcpy for remaining 32 bytes"), which was made on a totally bogus basis. The thing it was worried about overlapping came from the stack, not from one of its arguments, as Eric pointed out. But the fact that this confusion even happened draws attention to the fact that it's a bit non-obvious that the random_data parameter can alias chacha_state, and in fact should do so when the caller can't rely on the stack being cleared in a timely manner. So this commit documents that. Reported-by: Eric Biggers Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 3a293f919af9..4c9adb4f3d5d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -318,6 +318,13 @@ static void crng_reseed(bool force) * the resultant ChaCha state to the user, along with the second * half of the block containing 32 bytes of random data that may * be used; random_data_len may not be greater than 32. + * + * The returned ChaCha state contains within it a copy of the old + * key value, at index 4, so the state should always be zeroed out + * immediately after using in order to maintain forward secrecy. + * If the state cannot be erased in a timely manner, then it is + * safer to set the random_data parameter to &chacha_state[4] so + * that this function overwrites it before returning. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], @@ -333,7 +340,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], chacha20_block(chacha_state, first_block); memcpy(key, first_block, CHACHA_KEY_SIZE); - memmove(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); memzero_explicit(first_block, sizeof(first_block)); } From ec862155c3ccbde59644336eec58468e7d07519b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sun, 17 Apr 2022 14:50:57 +0700 Subject: [PATCH 307/803] Documentation: siphash: convert danger note to warning for HalfSipHash Render danger paragraph into warning block for emphasization. Cc: Jonathan Corbet Cc: Eric Biggers Cc: Herbert Xu Cc: Mauro Carvalho Chehab Cc: linux-kernel@vger.kernel.org Signed-off-by: Bagas Sanjaya Signed-off-by: Jonathan Corbet Signed-off-by: Jason A. Donenfeld --- Documentation/security/siphash.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst index bd9363025fcb..42794a7e052f 100644 --- a/Documentation/security/siphash.rst +++ b/Documentation/security/siphash.rst @@ -121,12 +121,12 @@ even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output) instead of SipHash's 128-bit key. However, this may appeal to some high-performance `jhash` users. -Danger! - -Do not ever use HalfSipHash except for as a hashtable key function, and only -then when you can be absolutely certain that the outputs will never be -transmitted out of the kernel. This is only remotely useful over `jhash` as a -means of mitigating hashtable flooding denial of service attacks. +.. warning:: + Do not ever use HalfSipHash except for as a hashtable key function, and + only then when you can be absolutely certain that the outputs will never + be transmitted out of the kernel. This is only remotely useful over + `jhash` as a means of mitigating hashtable flooding denial of service + attacks. Generating a HalfSipHash key ============================ From 2fbfeb4fa61684955980b99603c29d2002a67118 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sun, 17 Apr 2022 14:50:58 +0700 Subject: [PATCH 308/803] Documentation: siphash: enclose HalfSipHash usage example in the literal block Render usage example of HalfSipHash function as code block by using literal block syntax. Cc: Jonathan Corbet Cc: Eric Biggers Cc: Herbert Xu Cc: Mauro Carvalho Chehab Cc: linux-kernel@vger.kernel.org Signed-off-by: Bagas Sanjaya Signed-off-by: Jonathan Corbet Signed-off-by: Jason A. Donenfeld --- Documentation/security/siphash.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst index 42794a7e052f..96b1492f4773 100644 --- a/Documentation/security/siphash.rst +++ b/Documentation/security/siphash.rst @@ -132,10 +132,10 @@ Generating a HalfSipHash key ============================ Keys should always be generated from a cryptographically secure source of -random numbers, either using get_random_bytes or get_random_once: +random numbers, either using get_random_bytes or get_random_once:: -hsiphash_key_t key; -get_random_bytes(&key, sizeof(key)); + hsiphash_key_t key; + get_random_bytes(&key, sizeof(key)); If you're not deriving your key from here, you're doing it wrong. From 5a7e470e460fb90657343d843732325e53bb875f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 21 Apr 2022 17:27:31 -0700 Subject: [PATCH 309/803] Documentation: siphash: disambiguate HalfSipHash algorithm from hsiphash functions Fix the documentation for the hsiphash functions to avoid conflating the HalfSipHash algorithm with the hsiphash functions, since these functions actually implement either HalfSipHash or SipHash, and random.c now uses HalfSipHash (in a very special way) without the hsiphash functions. Signed-off-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- Documentation/security/siphash.rst | 34 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst index 96b1492f4773..a10380cb78e5 100644 --- a/Documentation/security/siphash.rst +++ b/Documentation/security/siphash.rst @@ -121,15 +121,25 @@ even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output) instead of SipHash's 128-bit key. However, this may appeal to some high-performance `jhash` users. +HalfSipHash support is provided through the "hsiphash" family of functions. + .. warning:: - Do not ever use HalfSipHash except for as a hashtable key function, and - only then when you can be absolutely certain that the outputs will never - be transmitted out of the kernel. This is only remotely useful over - `jhash` as a means of mitigating hashtable flooding denial of service + Do not ever use the hsiphash functions except for as a hashtable key + function, and only then when you can be absolutely certain that the outputs + will never be transmitted out of the kernel. This is only remotely useful + over `jhash` as a means of mitigating hashtable flooding denial of service attacks. -Generating a HalfSipHash key -============================ +On 64-bit kernels, the hsiphash functions actually implement SipHash-1-3, a +reduced-round variant of SipHash, instead of HalfSipHash-1-3. This is because in +64-bit code, SipHash-1-3 is no slower than HalfSipHash-1-3, and can be faster. +Note, this does *not* mean that in 64-bit kernels the hsiphash functions are the +same as the siphash ones, or that they are secure; the hsiphash functions still +use a less secure reduced-round algorithm and truncate their outputs to 32 +bits. + +Generating a hsiphash key +========================= Keys should always be generated from a cryptographically secure source of random numbers, either using get_random_bytes or get_random_once:: @@ -139,8 +149,8 @@ random numbers, either using get_random_bytes or get_random_once:: If you're not deriving your key from here, you're doing it wrong. -Using the HalfSipHash functions -=============================== +Using the hsiphash functions +============================ There are two variants of the function, one that takes a list of integers, and one that takes a buffer:: @@ -183,7 +193,7 @@ You may then iterate like usual over the returned hash bucket. Performance =========== -HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements, -this will not be a problem, as the hashtable lookup isn't the bottleneck. And -in general, this is probably a good sacrifice to make for the security and DoS -resistance of HalfSipHash. +hsiphash() is roughly 3 times slower than jhash(). For many replacements, this +will not be a problem, as the hashtable lookup isn't the bottleneck. And in +general, this is probably a good sacrifice to make for the security and DoS +resistance of hsiphash(). From 75d2b2b06bd8407d03a3f126bc8b95eb356906c7 Mon Sep 17 00:00:00 2001 From: Adam Wujek Date: Wed, 20 Apr 2022 14:51:25 +0000 Subject: [PATCH 310/803] hwmon: (pmbus) disable PEC if not enabled Explicitly disable PEC when the client does not support it. The problematic scenario is the following. A device with enabled PEC support is up and running and a kernel driver is loaded. Then the driver is unloaded (or device unbound), the HW device is reconfigured externally (e.g. by i2cset) to advertise itself as not supporting PEC. Without a new code, at the second load of the driver (or bind) the "flags" variable is not updated to avoid PEC usage. As a consequence the further communication with the device is done with the PEC enabled, which is wrong and may fail. The implementation first disable the I2C_CLIENT_PEC flag, then the old code enable it if needed. Fixes: 4e5418f787ec ("hwmon: (pmbus_core) Check adapter PEC support") Signed-off-by: Adam Wujek Link: https://lore.kernel.org/r/20220420145059.431061-1-dev_public@wujek.eu Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index b2618b1d529e..d93574d6a1fb 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2326,6 +2326,9 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, data->has_status_word = true; } + /* Make sure PEC is disabled, will be enabled later if needed */ + client->flags &= ~I2C_CLIENT_PEC; + /* Enable PEC if the controller and bus supports it */ if (!(data->flags & PMBUS_NO_CAPABILITY)) { ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY); From 0fc74d820a012550be006ba82dd8f1e3fe6fa9f7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Apr 2022 01:28:01 -0700 Subject: [PATCH 311/803] no-MMU: expose vmalloc_huge() for alloc_large_system_hash() It turns out that for the CONFIG_MMU=n builds, vmalloc_huge() was never defined, since it's defined in mm/vmalloc.c, which doesn't get built for the no-MMU configurations. Just implement the trivial wrapper for the no-MMU case too. In fact, just make it an alias to the existing __vmalloc() function that has the same signature. Link: https://lore.kernel.org/all/CAMuHMdVdx2V1uhv_152Sw3_z2xE0spiaWp1d6Ko8-rYmAxUBAg@mail.gmail.com/ Link: https://lore.kernel.org/all/CA+G9fYscb1y4a17Sf5G_Aibt+WuSf-ks_Qjw9tYFy=A4sjCEug@mail.gmail.com/ Link: https://lore.kernel.org/all/20220425150356.GA4138752@roeck-us.net/ Reported-and-tested-by: Linux Kernel Functional Testing Reported-and-tested-by: Geert Uytterhoeven Reported-by: Sudip Mukherjee Reported-by: Guenter Roeck Signed-off-by: Linus Torvalds --- mm/nommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/nommu.c b/mm/nommu.c index 55a9e48a7a02..9d7afc2d959e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -226,6 +226,8 @@ void *vmalloc(unsigned long size) } EXPORT_SYMBOL(vmalloc); +void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc); + /* * vzalloc - allocate virtually contiguous memory with zero fill * From fff8c10368e64e7f8960f149375c12ca5f3b30af Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 21 Apr 2022 16:35:49 +0200 Subject: [PATCH 312/803] bus: sunxi-rsb: Fix the return value of sunxi_rsb_device_create() This code is really spurious. It always returns an ERR_PTR, even when err is known to be 0 and calls put_device() after a successful device_register() call. It is likely that the return statement in the normal path is missing. Add 'return rdev;' to fix it. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Signed-off-by: Christophe JAILLET Reviewed-by: Samuel Holland Tested-by: Samuel Holland Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/ef2b9576350bba4c8e05e669e9535e9e2a415763.1650551719.git.christophe.jaillet@wanadoo.fr --- drivers/bus/sunxi-rsb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4566e730ef2b..60b082fe2ed0 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb, dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev)); + return rdev; + err_device_add: put_device(&rdev->dev); From f58ca215cda1975f77b2b762903684a3c101bec9 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 21 Apr 2022 21:43:08 +0800 Subject: [PATCH 313/803] clk: sunxi: sun9i-mmc: check return value after calling platform_get_resource() It will cause null-ptr-deref if platform_get_resource() returns NULL, we need check the return value. Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets") Signed-off-by: Yang Yingliang Reviewed-by: Samuel Holland Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220421134308.2885094-1-yangyingliang@huawei.com --- drivers/clk/sunxi/clk-sun9i-mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index 542b31d6e96d..636bcf2439ef 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) spin_lock_init(&data->lock); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) + return -EINVAL; /* one clock/reset pair per word */ count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH); data->membase = devm_ioremap_resource(&pdev->dev, r); From 97b9af7a70936e331170c79040cc9bf20071b566 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Fri, 22 Apr 2022 15:56:18 +0800 Subject: [PATCH 314/803] net/smc: Only save the original clcsock callback functions Both listen and fallback process will save the current clcsock callback functions and establish new ones. But if both of them happen, the saved callback functions will be overwritten. So this patch introduces some helpers to ensure that only save the original callback functions of clcsock. Fixes: 341adeec9ada ("net/smc: Forward wakeup to smc socket waitqueue after fallback") Signed-off-by: Wen Gu Acked-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 55 +++++++++++++++++++++++++++++---------------- net/smc/smc.h | 29 ++++++++++++++++++++++++ net/smc/smc_close.c | 3 ++- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bbb1a4ce5050..d8433f17c5c9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -373,6 +373,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); mutex_init(&smc->clcsock_release_lock); + smc_init_saved_callbacks(smc); return sk; } @@ -782,9 +783,24 @@ static void smc_fback_error_report(struct sock *clcsk) smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); } +static void smc_fback_replace_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + + smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, + &smc->clcsk_state_change); + smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready, + &smc->clcsk_data_ready); + smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space, + &smc->clcsk_write_space); + smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, + &smc->clcsk_error_report); +} + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { - struct sock *clcsk; int rc = 0; mutex_lock(&smc->clcsock_release_lock); @@ -792,10 +808,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) rc = -EBADF; goto out; } - clcsk = smc->clcsock->sk; - if (smc->use_fallback) - goto out; smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -810,18 +823,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * in smc sk->sk_wq and they should be woken up * as clcsock's wait queue is woken up. */ - smc->clcsk_state_change = clcsk->sk_state_change; - smc->clcsk_data_ready = clcsk->sk_data_ready; - smc->clcsk_write_space = clcsk->sk_write_space; - smc->clcsk_error_report = clcsk->sk_error_report; - - clcsk->sk_state_change = smc_fback_state_change; - clcsk->sk_data_ready = smc_fback_data_ready; - clcsk->sk_write_space = smc_fback_write_space; - clcsk->sk_error_report = smc_fback_error_report; - - smc->clcsock->sk->sk_user_data = - (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_fback_replace_callbacks(smc); } out: mutex_unlock(&smc->clcsock_release_lock); @@ -1596,6 +1598,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) * function; switch it back to the original sk_data_ready function */ new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; + + /* if new clcsock has also inherited the fallback-specific callback + * functions, switch them back to the original ones. + */ + if (lsmc->use_fallback) { + if (lsmc->clcsk_state_change) + new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change; + if (lsmc->clcsk_write_space) + new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space; + if (lsmc->clcsk_error_report) + new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report; + } + (*new_smc)->clcsock = new_clcsock; out: return rc; @@ -2397,10 +2412,10 @@ static int smc_listen(struct socket *sock, int backlog) /* save original sk_data_ready function and establish * smc-specific sk_data_ready function */ - smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; - smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, + smc_clcsock_data_ready, &smc->clcsk_data_ready); /* save original ops */ smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops; @@ -2415,7 +2430,9 @@ static int smc_listen(struct socket *sock, int backlog) rc = kernel_listen(smc->clcsock, backlog); if (rc) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; goto out; } sk->sk_max_ack_backlog = backlog; diff --git a/net/smc/smc.h b/net/smc/smc.h index ea0620529ebe..5ed765ea0c73 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline void smc_init_saved_callbacks(struct smc_sock *smc) +{ + smc->clcsk_state_change = NULL; + smc->clcsk_data_ready = NULL; + smc->clcsk_write_space = NULL; + smc->clcsk_error_report = NULL; +} + static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } +/* save target_cb in saved_cb, and replace target_cb with new_cb */ +static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), + void (*new_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + /* only save once */ + if (!*saved_cb) + *saved_cb = *target_cb; + *target_cb = new_cb; +} + +/* restore target_cb to saved_cb, and reset saved_cb to NULL */ +static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + if (!*saved_cb) + return; + *target_cb = *saved_cb; + *saved_cb = NULL; +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 676cb2333d3c..7bd1ef55b9df 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -214,7 +214,8 @@ again: sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); /* wake up accept */ if (smc->clcsock && smc->clcsock->sk) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); smc->clcsock->sk->sk_user_data = NULL; rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } From 0558226cebee256aa3f8ec0cc5a800a10bf120a6 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Fri, 22 Apr 2022 15:56:19 +0800 Subject: [PATCH 315/803] net/smc: Fix slab-out-of-bounds issue in fallback syzbot reported a slab-out-of-bounds/use-after-free issue, which was caused by accessing an already freed smc sock in fallback-specific callback functions of clcsock. This patch fixes the issue by restoring fallback-specific callback functions to original ones and resetting clcsock sk_user_data to NULL before freeing smc sock. Meanwhile, this patch introduces sk_callback_lock to make the access and assignment to sk_user_data mutually exclusive. Reported-by: syzbot+b425899ed22c6943e00b@syzkaller.appspotmail.com Fixes: 341adeec9ada ("net/smc: Forward wakeup to smc socket waitqueue after fallback") Link: https://lore.kernel.org/r/00000000000013ca8105d7ae3ada@google.com/ Signed-off-by: Wen Gu Acked-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 80 ++++++++++++++++++++++++++++++++------------- net/smc/smc_close.c | 2 ++ 2 files changed, 59 insertions(+), 23 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d8433f17c5c9..fce16b9d6e1a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -243,11 +243,27 @@ struct proto smc_proto6 = { }; EXPORT_SYMBOL_GPL(smc_proto6); +static void smc_fback_restore_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = NULL; + + smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); + smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); + smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space); + smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); +} + static void smc_restore_fallback_changes(struct smc_sock *smc) { if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ smc->clcsock->file->private_data = smc->sk.sk_socket; smc->clcsock->file = NULL; + smc_fback_restore_callbacks(smc); } } @@ -745,48 +761,57 @@ out: static void smc_fback_state_change(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_state_change); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_data_ready(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_data_ready); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_write_space(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_write_space); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_error_report(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_error_report); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_replace_callbacks(struct smc_sock *smc) { struct sock *clcsk = smc->clcsock->sk; + write_lock_bh(&clcsk->sk_callback_lock); clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, @@ -797,6 +822,8 @@ static void smc_fback_replace_callbacks(struct smc_sock *smc) &smc->clcsk_write_space); smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); } static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) @@ -2370,17 +2397,20 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc = - smc_clcsock_user_data(listen_clcsock); + struct smc_sock *lsmc; + read_lock_bh(&listen_clcsock->sk_callback_lock); + lsmc = smc_clcsock_user_data(listen_clcsock); if (!lsmc) - return; + goto out; lsmc->clcsk_data_ready(listen_clcsock); if (lsmc->sk.sk_state == SMC_LISTEN) { sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work)) sock_put(&lsmc->sk); } +out: + read_unlock_bh(&listen_clcsock->sk_callback_lock); } static int smc_listen(struct socket *sock, int backlog) @@ -2412,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog) /* save original sk_data_ready function and establish * smc-specific sk_data_ready function */ + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, smc_clcsock_data_ready, &smc->clcsk_data_ready); + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); /* save original ops */ smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops; @@ -2430,9 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog) rc = kernel_listen(smc->clcsock, backlog); if (rc) { + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); goto out; } sk->sk_max_ack_backlog = backlog; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 7bd1ef55b9df..31db7438857c 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -214,9 +214,11 @@ again: sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); /* wake up accept */ if (smc->clcsock && smc->clcsock->sk) { + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } smc_close_cleanup_listen(sk); From 6fbe0cc53a1b5f7abe2c44399c18e943adbebe2f Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 20 Apr 2022 10:06:39 +0300 Subject: [PATCH 316/803] video: fbdev: clps711x-fb: Use syscon_regmap_lookup_by_phandle Since version 5.13, the standard syscon bindings have been added to all clps711x DT nodes, so we can now use the more general syscon_regmap_lookup_by_phandle function to get the syscon pointer. Signed-off-by: Alexander Shiyan Signed-off-by: Helge Deller --- drivers/video/fbdev/clps711x-fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index c5d15c6db287..771ce1f76951 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -268,8 +268,7 @@ static int clps711x_fb_probe(struct platform_device *pdev) goto out_fb_release; } - cfb->syscon = - syscon_regmap_lookup_by_compatible("cirrus,ep7209-syscon1"); + cfb->syscon = syscon_regmap_lookup_by_phandle(np, "syscon"); if (IS_ERR(cfb->syscon)) { ret = PTR_ERR(cfb->syscon); goto out_fb_release; From 00c94ebec5925593c0377b941289224469e72ac7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Apr 2022 18:04:27 -0400 Subject: [PATCH 317/803] NFSv4: Don't invalidate inode attributes on delegation return There is no need to declare attributes such as the ctime, mtime and block size invalid when we're just returning a delegation, so it is inappropriate to call nfs_post_op_update_inode_force_wcc(). Instead, just call nfs_refresh_inode() after faking up the change attribute. We know that the GETATTR op occurs before the DELEGRETURN, so we are safe when doing this. Fixes: 0bc2c9b4dca9 ("NFSv4: Don't discard the attributes returned by asynchronous DELEGRETURN") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 16106f805ffa..a79f66432bd3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -363,6 +363,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } +static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { + fattr->pre_change_attr = version; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } +} + static void nfs4_test_and_free_stateid(struct nfs_server *server, nfs4_stateid *stateid, const struct cred *cred) @@ -6553,7 +6561,9 @@ static void nfs4_delegreturn_release(void *calldata) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); if (inode) { - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + nfs4_fattr_set_prechange(&data->fattr, + inode_peek_iversion_raw(inode)); + nfs_refresh_inode(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); From 19317433057dc1f2ca9a975e4e6b547282c2a5ef Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Mon, 25 Apr 2022 03:47:26 -0700 Subject: [PATCH 318/803] tty: n_gsm: fix sometimes uninitialized warning in gsm_dlci_modem_output() 'size' may be used uninitialized in gsm_dlci_modem_output() if called with an adaption that is neither 1 nor 2. The function is currently only called by gsm_modem_upd_via_data() and only for adaption 2. Properly handle every invalid case by returning -EINVAL to silence the compiler warning and avoid future regressions. Fixes: c19ffe00fed6 ("tty: n_gsm: fix invalid use of MSC in advanced option") Cc: stable@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220425104726.7986-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 8652308c187f..a38b922bcbc1 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -932,18 +932,21 @@ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, { u8 *dp = NULL; struct gsm_msg *msg; - int size; + int size = 0; /* for modem bits without break data */ - if (dlci->adaption == 1) { - size = 0; - } else if (dlci->adaption == 2) { - size = 1; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + size++; if (brk > 0) size++; - } else { + break; + default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); + return -EINVAL; } msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); From e4e6448638a01905faeda9bf96aa9df7c8ef463c Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 7 Apr 2022 23:09:19 +0300 Subject: [PATCH 319/803] cpufreq: qcom-cpufreq-hw: Clear dcvs interrupts It's noted that dcvs interrupts are not self-clearing, thus an interrupt handler runs constantly, which leads to a severe regression in runtime. To fix the problem an explicit write to clear interrupt register is required, note that on OSM platforms the register may not be present. Fixes: 275157b367f4 ("cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 0ec18e1589dc..0253731d6d25 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -24,6 +24,8 @@ #define CLK_HW_DIV 2 #define LUT_TURBO_IND 1 +#define GT_IRQ_STATUS BIT(2) + #define HZ_PER_KHZ 1000 struct qcom_cpufreq_soc_data { @@ -32,6 +34,7 @@ struct qcom_cpufreq_soc_data { u32 reg_dcvs_ctrl; u32 reg_freq_lut; u32 reg_volt_lut; + u32 reg_intr_clr; u32 reg_current_vote; u32 reg_perf_state; u8 lut_row_size; @@ -360,6 +363,10 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) disable_irq_nosync(c_data->throttle_irq); schedule_delayed_work(&c_data->throttle_work, 0); + if (c_data->soc_data->reg_intr_clr) + writel_relaxed(GT_IRQ_STATUS, + c_data->base + c_data->soc_data->reg_intr_clr); + return IRQ_HANDLED; } @@ -379,6 +386,7 @@ static const struct qcom_cpufreq_soc_data epss_soc_data = { .reg_dcvs_ctrl = 0xb0, .reg_freq_lut = 0x100, .reg_volt_lut = 0x200, + .reg_intr_clr = 0x308, .reg_perf_state = 0x320, .lut_row_size = 4, }; From 4ae4dd2e26fdfebf0b8c6af6c325383eadfefdb4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 21 Apr 2022 19:22:21 +0300 Subject: [PATCH 320/803] drm/i915: Fix SEL_FETCH_PLANE_*(PIPE_B+) register addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix typo in the _SEL_FETCH_PLANE_BASE_1_B register base address. Fixes: a5523e2ff074a5 ("drm/i915: Add PSR2 selective fetch registers") References: https://gitlab.freedesktop.org/drm/intel/-/issues/5400 Cc: José Roberto de Souza Cc: # v5.9+ Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220421162221.2261895-1-imre.deak@intel.com (cherry picked from commit af2cbc6ef967f61711a3c40fca5366ea0bc7fecc) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7748f7f20b95..a9354f8f110d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5152,7 +5152,7 @@ #define _SEL_FETCH_PLANE_BASE_6_A 0x70940 #define _SEL_FETCH_PLANE_BASE_7_A 0x70960 #define _SEL_FETCH_PLANE_BASE_CUR_A 0x70880 -#define _SEL_FETCH_PLANE_BASE_1_B 0x70990 +#define _SEL_FETCH_PLANE_BASE_1_B 0x71890 #define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \ _SEL_FETCH_PLANE_BASE_1_A, \ From f7e1089f43761ca221914aea9a755b23dc7cbc33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Apr 2022 18:28:51 +0300 Subject: [PATCH 321/803] drm/i915/fbc: Consult hw.crtc instead of uapi.crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plane_state->uapi.crtc is not what we want to be looking at. If bigjoiner is used hw.crtc is what tells us what crtc the plane is supposedly using. Not an actual problem on current hardware as the only FBC capable pipe (A) can't be a bigjoiner slave and thus uapi.crtc==hw.crtc always here. But when we get more FBC instances this will become actually important. Fixes: 2e6c99f88679 ("drm/i915/fbc: Nuke lots of crap from intel_fbc_state_cache") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220413152852.7336-1-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare (cherry picked from commit 3e1faae3398789abe8d4797255bfe28d95d81308) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 87f4af3fd523..3e61a8936245 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1037,7 +1037,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); const struct drm_framebuffer *fb = plane_state->hw.fb; - struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); const struct intel_crtc_state *crtc_state; struct intel_fbc *fbc = plane->fbc; From b561275d633bcd8e0e8055ab86f1a13df75a0269 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 22 Apr 2022 19:43:40 +0800 Subject: [PATCH 322/803] mctp: defer the kfree of object mdev->addrs The function mctp_unregister() reclaims the device's relevant resource when a netcard detaches. However, a running routine may be unaware of this and cause the use-after-free of the mdev->addrs object. The race condition can be demonstrated below cleanup thread another thread | unregister_netdev() | mctp_sendmsg() ... | ... mctp_unregister() | rt = mctp_route_lookup() ... | mctl_local_output() kfree(mdev->addrs) | ... | saddr = rt->dev->addrs[0]; | An attacker can adopt the (recent provided) mtcpserial driver with pty to fake the device detaching and use the userfaultfd to increase the race success chance (in mctp_sendmsg). The KASan report for such a POC is shown below: [ 86.051955] ================================================================== [ 86.051955] BUG: KASAN: use-after-free in mctp_local_output+0x4e9/0xb7d [ 86.051955] Read of size 1 at addr ffff888005f298c0 by task poc/295 [ 86.051955] [ 86.051955] Call Trace: [ 86.051955] [ 86.051955] dump_stack_lvl+0x33/0x42 [ 86.051955] print_report.cold.13+0xb2/0x6b3 [ 86.051955] ? preempt_schedule_irq+0x57/0x80 [ 86.051955] ? mctp_local_output+0x4e9/0xb7d [ 86.051955] kasan_report+0xa5/0x120 [ 86.051955] ? mctp_local_output+0x4e9/0xb7d [ 86.051955] mctp_local_output+0x4e9/0xb7d [ 86.051955] ? mctp_dev_set_key+0x79/0x79 [ 86.051955] ? copyin+0x38/0x50 [ 86.051955] ? _copy_from_iter+0x1b6/0xf20 [ 86.051955] ? sysvec_apic_timer_interrupt+0x97/0xb0 [ 86.051955] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 86.051955] ? mctp_local_output+0x1/0xb7d [ 86.051955] mctp_sendmsg+0x64d/0xdb0 [ 86.051955] ? mctp_sk_close+0x20/0x20 [ 86.051955] ? __fget_light+0x2fd/0x4f0 [ 86.051955] ? mctp_sk_close+0x20/0x20 [ 86.051955] sock_sendmsg+0xdd/0x110 [ 86.051955] __sys_sendto+0x1cc/0x2a0 [ 86.051955] ? __ia32_sys_getpeername+0xa0/0xa0 [ 86.051955] ? new_sync_write+0x335/0x550 [ 86.051955] ? alloc_file+0x22f/0x500 [ 86.051955] ? __ip_do_redirect+0x820/0x1820 [ 86.051955] ? vfs_write+0x44d/0x7b0 [ 86.051955] ? vfs_write+0x44d/0x7b0 [ 86.051955] ? fput_many+0x15/0x120 [ 86.051955] ? ksys_write+0x155/0x1b0 [ 86.051955] ? __ia32_sys_read+0xa0/0xa0 [ 86.051955] __x64_sys_sendto+0xd8/0x1b0 [ 86.051955] ? exit_to_user_mode_prepare+0x2f/0x120 [ 86.051955] ? syscall_exit_to_user_mode+0x12/0x20 [ 86.051955] do_syscall_64+0x3a/0x80 [ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 86.051955] RIP: 0033:0x7f82118a56b3 [ 86.051955] RSP: 002b:00007ffdb154b110 EFLAGS: 00000293 ORIG_RAX: 000000000000002c [ 86.051955] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f82118a56b3 [ 86.051955] RDX: 0000000000000010 RSI: 00007f8211cd4000 RDI: 0000000000000007 [ 86.051955] RBP: 00007ffdb154c1d0 R08: 00007ffdb154b164 R09: 000000000000000c [ 86.051955] R10: 0000000000000000 R11: 0000000000000293 R12: 000055d779800db0 [ 86.051955] R13: 00007ffdb154c2b0 R14: 0000000000000000 R15: 0000000000000000 [ 86.051955] [ 86.051955] [ 86.051955] Allocated by task 295: [ 86.051955] kasan_save_stack+0x1c/0x40 [ 86.051955] __kasan_kmalloc+0x84/0xa0 [ 86.051955] mctp_rtm_newaddr+0x242/0x610 [ 86.051955] rtnetlink_rcv_msg+0x2fd/0x8b0 [ 86.051955] netlink_rcv_skb+0x11c/0x340 [ 86.051955] netlink_unicast+0x439/0x630 [ 86.051955] netlink_sendmsg+0x752/0xc00 [ 86.051955] sock_sendmsg+0xdd/0x110 [ 86.051955] __sys_sendto+0x1cc/0x2a0 [ 86.051955] __x64_sys_sendto+0xd8/0x1b0 [ 86.051955] do_syscall_64+0x3a/0x80 [ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 86.051955] [ 86.051955] Freed by task 301: [ 86.051955] kasan_save_stack+0x1c/0x40 [ 86.051955] kasan_set_track+0x21/0x30 [ 86.051955] kasan_set_free_info+0x20/0x30 [ 86.051955] __kasan_slab_free+0x104/0x170 [ 86.051955] kfree+0x8c/0x290 [ 86.051955] mctp_dev_notify+0x161/0x2c0 [ 86.051955] raw_notifier_call_chain+0x8b/0xc0 [ 86.051955] unregister_netdevice_many+0x299/0x1180 [ 86.051955] unregister_netdevice_queue+0x210/0x2f0 [ 86.051955] unregister_netdev+0x13/0x20 [ 86.051955] mctp_serial_close+0x6d/0xa0 [ 86.051955] tty_ldisc_kill+0x31/0xa0 [ 86.051955] tty_ldisc_hangup+0x24f/0x560 [ 86.051955] __tty_hangup.part.28+0x2ce/0x6b0 [ 86.051955] tty_release+0x327/0xc70 [ 86.051955] __fput+0x1df/0x8b0 [ 86.051955] task_work_run+0xca/0x150 [ 86.051955] exit_to_user_mode_prepare+0x114/0x120 [ 86.051955] syscall_exit_to_user_mode+0x12/0x20 [ 86.051955] do_syscall_64+0x46/0x80 [ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 86.051955] [ 86.051955] The buggy address belongs to the object at ffff888005f298c0 [ 86.051955] which belongs to the cache kmalloc-8 of size 8 [ 86.051955] The buggy address is located 0 bytes inside of [ 86.051955] 8-byte region [ffff888005f298c0, ffff888005f298c8) [ 86.051955] [ 86.051955] The buggy address belongs to the physical page: [ 86.051955] flags: 0x100000000000200(slab|node=0|zone=1) [ 86.051955] raw: 0100000000000200 dead000000000100 dead000000000122 ffff888005c42280 [ 86.051955] raw: 0000000000000000 0000000080660066 00000001ffffffff 0000000000000000 [ 86.051955] page dumped because: kasan: bad access detected [ 86.051955] [ 86.051955] Memory state around the buggy address: [ 86.051955] ffff888005f29780: 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 [ 86.051955] ffff888005f29800: fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc [ 86.051955] >ffff888005f29880: fc fc fc fb fc fc fc fc fa fc fc fc fc fa fc fc [ 86.051955] ^ [ 86.051955] ffff888005f29900: fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc [ 86.051955] ffff888005f29980: fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc [ 86.051955] ================================================================== To this end, just like the commit e04480920d1e ("Bluetooth: defer cleanup of resources in hci_unregister_dev()") this patch defers the destructive kfree(mdev->addrs) in mctp_unregister to the mctp_dev_put, where the refcount of mdev is zero and the entire device is reclaimed. This prevents the use-after-free because the sendmsg thread holds the reference of mdev in the mctp_route object. Fixes: 583be982d934 (mctp: Add device handling and netlink interface) Signed-off-by: Lin Ma Acked-by: Jeremy Kerr Link: https://lore.kernel.org/r/20220422114340.32346-1-linma@zju.edu.cn Signed-off-by: Paolo Abeni --- net/mctp/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/device.c b/net/mctp/device.c index f49be882e98e..99a3bda8852f 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev) void mctp_dev_put(struct mctp_dev *mdev) { if (mdev && refcount_dec_and_test(&mdev->refs)) { + kfree(mdev->addrs); dev_put(mdev->dev); kfree_rcu(mdev, rcu); } @@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev) mctp_route_remove_dev(mdev); mctp_neigh_remove_dev(mdev); - kfree(mdev->addrs); mctp_dev_put(mdev); } From acac0541d1d65e81e599ec399d34d184d2424401 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Sun, 24 Apr 2022 09:53:07 -0700 Subject: [PATCH 323/803] net: bcmgenet: hide status block before TX timestamping The hardware checksum offloading requires use of a transmit status block inserted before the outgoing frame data, this was updated in '9a9ba2a4aaaa ("net: bcmgenet: always enable status blocks")' However, skb_tx_timestamp() assumes that it is passed a raw frame and PTP parsing chokes on this status block. Fix this by calling __skb_pull(), which hides the TSB before calling skb_tx_timestamp(), so an outgoing PTP packet is parsed correctly. As the data in the skb has already been set up for DMA, and the dma_unmap_* calls use a separately stored address, there is no no effective change in the data transmission. Signed-off-by: Jonathan Lemon Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220424165307.591145-1-jonathan.lemon@gmail.com Fixes: d03825fba459 ("net: bcmgenet: add skb_tx_timestamp call") Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9a41145dadfc..bf1ec8fdc2ad 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2035,6 +2035,11 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev, return skb; } +static void bcmgenet_hide_tsb(struct sk_buff *skb) +{ + __skb_pull(skb, sizeof(struct status_64)); +} + static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -2141,6 +2146,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) } GENET_CB(skb)->last_cb = tx_cb_ptr; + + bcmgenet_hide_tsb(skb); skb_tx_timestamp(skb); /* Decrement total BD count and advance our write pointer */ From 1fa568e26f001e951b634d62ef3accdc80a87c7b Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Tue, 26 Apr 2022 11:20:07 +0800 Subject: [PATCH 324/803] bug: Have __warn() prototype defined unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The __warn() prototype is declared in CONFIG_BUG scope but the function definition in panic.c is unconditional. The IBT enablement started using it unconditionally but a CONFIG_X86_KERNEL_IBT=y, CONFIG_BUG=n .config will trigger a arch/x86/kernel/traps.c: In function ‘__exc_control_protection’: arch/x86/kernel/traps.c:249:17: error: implicit declaration of function \ ‘__warn’; did you mean ‘pr_warn’? [-Werror=implicit-function-declaration] Pull up the declarations so that they're unconditionally visible too. [ bp: Rewrite commit message. ] Fixes: 991625f3dd2c ("x86/ibt: Add IBT feature, MSR and #CP handling") Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Shida Zhang Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220426032007.510245-1-starzhangzsd@gmail.com --- include/asm-generic/bug.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index edb0e2a602a8..ba1f860af38b 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -21,6 +21,12 @@ #include #include +struct warn_args; +struct pt_regs; + +void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args); + #ifdef CONFIG_BUG #ifdef CONFIG_GENERIC_BUG @@ -110,11 +116,6 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #endif /* used internally by panic.c */ -struct warn_args; -struct pt_regs; - -void __warn(const char *file, int line, void *caller, unsigned taint, - struct pt_regs *regs, struct warn_args *args); #ifndef WARN_ON #define WARN_ON(condition) ({ \ From 0ed9704b660b259b54743cad8a84a11148f60f0a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Apr 2022 09:27:38 +0300 Subject: [PATCH 325/803] net: phy: marvell10g: fix return value on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return back the error value that we get from phy_read_mmd(). Fixes: c84786fa8f91 ("net: phy: marvell10g: read copper results from CSSR1") Signed-off-by: Baruch Siach Reviewed-by: Marek Behún Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/f47cb031aeae873bb008ba35001607304a171a20.1650868058.git.baruch@tkos.co.il Signed-off-by: Paolo Abeni --- drivers/net/phy/marvell10g.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index b6fea119fe13..2b7d0720720b 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -880,7 +880,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev) cssr1 = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_CSSR1); if (cssr1 < 0) - return val; + return cssr1; /* If the link settings are not resolved, mark the link down */ if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) { From 24cbdb910bb62b5be3865275e5682be1a7708c0f Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Mon, 25 Apr 2022 07:04:54 +0000 Subject: [PATCH 326/803] net: dsa: mv88e6xxx: Fix port_hidden_wait to account for port_base_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The other port_hidden functions rely on the port_read/port_write functions to access the hidden control port. These functions apply the offset for port_base_addr where applicable. Update port_hidden_wait to use the port_wait_bit so that port_base_addr offsets are accounted for when waiting for the busy bit to change. Without the offset the port_hidden_wait function would timeout on devices that have a non-zero port_base_addr (e.g. MV88E6141), however devices that have a zero port_base_addr would operate correctly (e.g. MV88E6390). Fixes: 609070133aff ("net: dsa: mv88e6xxx: update code operating on hidden registers") Signed-off-by: Nathan Rossi Reviewed-by: Marek Behún Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220425070454.348584-1-nathan@nathanrossi.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/port_hidden.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port_hidden.c b/drivers/net/dsa/mv88e6xxx/port_hidden.c index b49d05f0e117..7a9f9ff6dedf 100644 --- a/drivers/net/dsa/mv88e6xxx/port_hidden.c +++ b/drivers/net/dsa/mv88e6xxx/port_hidden.c @@ -40,8 +40,9 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip) { int bit = __bf_shf(MV88E6XXX_PORT_RESERVED_1A_BUSY); - return mv88e6xxx_wait_bit(chip, MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT, - MV88E6XXX_PORT_RESERVED_1A, bit, 0); + return mv88e6xxx_port_wait_bit(chip, + MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT, + MV88E6XXX_PORT_RESERVED_1A, bit, 0); } int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port, From acb16b395c3f3d7502443e0c799c2b42df645642 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 25 Apr 2022 13:37:03 +0300 Subject: [PATCH 327/803] virtio_net: fix wrong buf address calculation when using xdp We received a report[1] of kernel crashes when Cilium is used in XDP mode with virtio_net after updating to newer kernels. After investigating the reason it turned out that when using mergeable bufs with an XDP program which adjusts xdp.data or xdp.data_meta page_to_buf() calculates the build_skb address wrong because the offset can become less than the headroom so it gets the address of the previous page (-X bytes depending on how lower offset is): page_to_skb: page addr ffff9eb2923e2000 buf ffff9eb2923e1ffc offset 252 headroom 256 This is a pr_err() I added in the beginning of page_to_skb which clearly shows offset that is less than headroom by adding 4 bytes of metadata via an xdp prog. The calculations done are: receive_mergeable(): headroom = VIRTIO_XDP_HEADROOM; // VIRTIO_XDP_HEADROOM == 256 bytes offset = xdp.data - page_address(xdp_page) - vi->hdr_len - metasize; page_to_skb(): p = page_address(page) + offset; ... buf = p - headroom; Now buf goes -4 bytes from the page's starting address as can be seen above which is set as skb->head and skb->data by build_skb later. Depending on what's done with the skb (when it's freed most often) we get all kinds of corruptions and BUG_ON() triggers in mm[2]. We have to recalculate the new headroom after the xdp program has run, similar to how offset and len are recalculated. Headroom is directly related to data_hard_start, data and data_meta, so we use them to get the new size. The result is correct (similar pr_err() in page_to_skb, one case of xdp_page and one case of virtnet buf): a) Case with 4 bytes of metadata [ 115.949641] page_to_skb: page addr ffff8b4dcfad2000 offset 252 headroom 252 [ 121.084105] page_to_skb: page addr ffff8b4dcf018000 offset 20732 headroom 252 b) Case of pushing data +32 bytes [ 153.181401] page_to_skb: page addr ffff8b4dd0c4d000 offset 288 headroom 288 [ 158.480421] page_to_skb: page addr ffff8b4dd00b0000 offset 24864 headroom 288 c) Case of pushing data -33 bytes [ 835.906830] page_to_skb: page addr ffff8b4dd3270000 offset 223 headroom 223 [ 840.839910] page_to_skb: page addr ffff8b4dcdd68000 offset 12511 headroom 223 Offset and headroom are equal because offset points to the start of reserved bytes for the virtio_net header which are at buf start + headroom, while data points at buf start + vnet hdr size + headroom so when data or data_meta are adjusted by the xdp prog both the headroom size and the offset change equally. We can use data_hard_start to compute the new headroom after the xdp prog (linearized / page start case, the virtnet buf case is similar just with bigger base offset): xdp.data_hard_start = page_address + vnet_hdr xdp.data = page_address + vnet_hdr + headroom new headroom after xdp prog = xdp.data - xdp.data_hard_start - metasize An example reproducer xdp prog[3] is below. [1] https://github.com/cilium/cilium/issues/19453 [2] Two of the many traces: [ 40.437400] BUG: Bad page state in process swapper/0 pfn:14940 [ 40.916726] BUG: Bad page state in process systemd-resolve pfn:053b7 [ 41.300891] kernel BUG at include/linux/mm.h:720! [ 41.301801] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 41.302784] CPU: 1 PID: 1181 Comm: kubelet Kdump: loaded Tainted: G B W 5.18.0-rc1+ #37 [ 41.304458] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014 [ 41.306018] RIP: 0010:page_frag_free+0x79/0xe0 [ 41.306836] Code: 00 00 75 ea 48 8b 07 a9 00 00 01 00 74 e0 48 8b 47 48 48 8d 50 ff a8 01 48 0f 45 fa eb d0 48 c7 c6 18 b8 30 a6 e8 d7 f8 fc ff <0f> 0b 48 8d 78 ff eb bc 48 8b 07 a9 00 00 01 00 74 3a 66 90 0f b6 [ 41.310235] RSP: 0018:ffffac05c2a6bc78 EFLAGS: 00010292 [ 41.311201] RAX: 000000000000003e RBX: 0000000000000000 RCX: 0000000000000000 [ 41.312502] RDX: 0000000000000001 RSI: ffffffffa6423004 RDI: 00000000ffffffff [ 41.313794] RBP: ffff993c98823600 R08: 0000000000000000 R09: 00000000ffffdfff [ 41.315089] R10: ffffac05c2a6ba68 R11: ffffffffa698ca28 R12: ffff993c98823600 [ 41.316398] R13: ffff993c86311ebc R14: 0000000000000000 R15: 000000000000005c [ 41.317700] FS: 00007fe13fc56740(0000) GS:ffff993cdd900000(0000) knlGS:0000000000000000 [ 41.319150] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.320152] CR2: 000000c00008a000 CR3: 0000000014908000 CR4: 0000000000350ee0 [ 41.321387] Call Trace: [ 41.321819] [ 41.322193] skb_release_data+0x13f/0x1c0 [ 41.322902] __kfree_skb+0x20/0x30 [ 41.343870] tcp_recvmsg_locked+0x671/0x880 [ 41.363764] tcp_recvmsg+0x5e/0x1c0 [ 41.384102] inet_recvmsg+0x42/0x100 [ 41.406783] ? sock_recvmsg+0x1d/0x70 [ 41.428201] sock_read_iter+0x84/0xd0 [ 41.445592] ? 0xffffffffa3000000 [ 41.462442] new_sync_read+0x148/0x160 [ 41.479314] ? 0xffffffffa3000000 [ 41.496937] vfs_read+0x138/0x190 [ 41.517198] ksys_read+0x87/0xc0 [ 41.535336] do_syscall_64+0x3b/0x90 [ 41.551637] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 41.568050] RIP: 0033:0x48765b [ 41.583955] Code: e8 4a 35 fe ff eb 88 cc cc cc cc cc cc cc cc e8 fb 7a fe ff 48 8b 7c 24 10 48 8b 74 24 18 48 8b 54 24 20 48 8b 44 24 08 0f 05 <48> 3d 01 f0 ff ff 76 20 48 c7 44 24 28 ff ff ff ff 48 c7 44 24 30 [ 41.632818] RSP: 002b:000000c000a2f5b8 EFLAGS: 00000212 ORIG_RAX: 0000000000000000 [ 41.664588] RAX: ffffffffffffffda RBX: 000000c000062000 RCX: 000000000048765b [ 41.681205] RDX: 0000000000005e54 RSI: 000000c000e66000 RDI: 0000000000000016 [ 41.697164] RBP: 000000c000a2f608 R08: 0000000000000001 R09: 00000000000001b4 [ 41.713034] R10: 00000000000000b6 R11: 0000000000000212 R12: 00000000000000e9 [ 41.728755] R13: 0000000000000001 R14: 000000c000a92000 R15: ffffffffffffffff [ 41.744254] [ 41.758585] Modules linked in: br_netfilter bridge veth netconsole virtio_net and [ 33.524802] BUG: Bad page state in process systemd-network pfn:11e60 [ 33.528617] page ffffe05dc0147b00 ffffe05dc04e7a00 ffff8ae9851ec000 (1) len 82 offset 252 metasize 4 hroom 0 hdr_len 12 data ffff8ae9851ec10c data_meta ffff8ae9851ec108 data_end ffff8ae9851ec14e [ 33.529764] page:000000003792b5ba refcount:0 mapcount:-512 mapping:0000000000000000 index:0x0 pfn:0x11e60 [ 33.532463] flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) [ 33.532468] raw: 000fffffc0000000 0000000000000000 dead000000000122 0000000000000000 [ 33.532470] raw: 0000000000000000 0000000000000000 00000000fffffdff 0000000000000000 [ 33.532471] page dumped because: nonzero mapcount [ 33.532472] Modules linked in: br_netfilter bridge veth netconsole virtio_net [ 33.532479] CPU: 0 PID: 791 Comm: systemd-network Kdump: loaded Not tainted 5.18.0-rc1+ #37 [ 33.532482] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014 [ 33.532484] Call Trace: [ 33.532496] [ 33.532500] dump_stack_lvl+0x45/0x5a [ 33.532506] bad_page.cold+0x63/0x94 [ 33.532510] free_pcp_prepare+0x290/0x420 [ 33.532515] free_unref_page+0x1b/0x100 [ 33.532518] skb_release_data+0x13f/0x1c0 [ 33.532524] kfree_skb_reason+0x3e/0xc0 [ 33.532527] ip6_mc_input+0x23c/0x2b0 [ 33.532531] ip6_sublist_rcv_finish+0x83/0x90 [ 33.532534] ip6_sublist_rcv+0x22b/0x2b0 [3] XDP program to reproduce(xdp_pass.c): #include #include SEC("xdp_pass") int xdp_pkt_pass(struct xdp_md *ctx) { bpf_xdp_adjust_head(ctx, -(int)32); return XDP_PASS; } char _license[] SEC("license") = "GPL"; compile: clang -O2 -g -Wall -target bpf -c xdp_pass.c -o xdp_pass.o load on virtio_net: ip link set enp1s0 xdpdrv obj xdp_pass.o sec xdp_pass CC: stable@vger.kernel.org CC: Jason Wang CC: Xuan Zhuo CC: Daniel Borkmann CC: "Michael S. Tsirkin" CC: virtualization@lists.linux-foundation.org Fixes: 8fb7da9e9907 ("virtio_net: get build_skb() buf by data ptr") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Xuan Zhuo Acked-by: Daniel Borkmann Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://lore.kernel.org/r/20220425103703.3067292-1-razor@blackwall.org Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 87838cbe38cf..cbba9d2e8f32 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1005,6 +1005,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * xdp.data_meta were adjusted */ len = xdp.data_end - xdp.data + vi->hdr_len + metasize; + + /* recalculate headroom if xdp.data or xdp_data_meta + * were adjusted, note that offset should always point + * to the start of the reserved bytes for virtio_net + * header which are followed by xdp.data, that means + * that offset is equal to the headroom (when buf is + * starting at the beginning of the page, otherwise + * there is a base offset inside the page) but it's used + * with a different starting point (buf start) than + * xdp.data (buf start + vnet hdr size). If xdp.data or + * data_meta were adjusted by the xdp prog then the + * headroom size has changed and so has the offset, we + * can use data_hard_start, which points at buf start + + * vnet hdr size, to calculate the new headroom and use + * it later to compute buf start in page_to_skb() + */ + headroom = xdp.data - xdp.data_hard_start - metasize; + /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); @@ -1012,7 +1030,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, metasize, - VIRTIO_XDP_HEADROOM); + headroom); return head_skb; } break; From 973e0f7a847ef13ade840d4c30729ce329a66895 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 25 Apr 2022 13:35:18 +0300 Subject: [PATCH 328/803] usb: dwc3: pci: add support for the Intel Meteor Lake-P This patch adds the necessary PCI IDs for Intel Meteor Lake-P devices. Signed-off-by: Heikki Krogerus Cc: stable Link: https://lore.kernel.org/r/20220425103518.44028-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 33f657d83246..2e19e0e4ea53 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -45,6 +45,8 @@ #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 +#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 +#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -456,6 +458,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, From 4bc31edebde51fcf8ad0794763b8679a7ecb5ec0 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 22 Apr 2022 10:08:53 -0700 Subject: [PATCH 329/803] mmc: core: Set HS clock speed before sending HS CMD13 Way back in commit 4f25580fb84d ("mmc: core: changes frequency to hs_max_dtr when selecting hs400es"), Rockchip engineers noticed that some eMMC don't respond to SEND_STATUS commands very reliably if they're still running at a low initial frequency. As mentioned in that commit, JESD84-B51 P49 suggests a sequence in which the host: 1. sets HS_TIMING 2. bumps the clock ("<= 52 MHz") 3. sends further commands It doesn't exactly require that we don't use a lower-than-52MHz frequency, but in practice, these eMMC don't like it. The aforementioned commit tried to get that right for HS400ES, although it's unclear whether this ever truly worked as committed into mainline, as other changes/refactoring adjusted the sequence in conflicting ways: 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") 53e60650f74e ("mmc: core: Allow CMD13 polling when switching to HS mode for mmc") In any case, today we do step 3 before step 2. Let's fix that, and also apply the same logic to HS200/400, where this eMMC has problems too. Resolves errors like this seen when booting some RK3399 Gru/Scarlet systems: [ 2.058881] mmc1: CQHCI version 5.10 [ 2.097545] mmc1: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA [ 2.209804] mmc1: mmc_select_hs400es failed, error -84 [ 2.215597] mmc1: error -84 whilst initialising MMC card [ 2.417514] mmc1: mmc_select_hs400es failed, error -110 [ 2.423373] mmc1: error -110 whilst initialising MMC card [ 2.605052] mmc1: mmc_select_hs400es failed, error -110 [ 2.617944] mmc1: error -110 whilst initialising MMC card [ 2.835884] mmc1: mmc_select_hs400es failed, error -110 [ 2.841751] mmc1: error -110 whilst initialising MMC card Ealier versions of this patch bumped to 200MHz/HS200 speeds too early, which caused issues on, e.g., qcom-msm8974-fairphone-fp2. (Thanks for the report Luca!) After a second look, it appears that aligns with JESD84 / page 45 / table 28, so we need to keep to lower (HS / 52 MHz) rates first. Fixes: 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") Fixes: 53e60650f74e ("mmc: core: Allow CMD13 polling when switching to HS mode for mmc") Fixes: 4f25580fb84d ("mmc: core: changes frequency to hs_max_dtr when selecting hs400es") Cc: Shawn Lin Link: https://lore.kernel.org/linux-mmc/11962455.O9o76ZdvQC@g550jk/ Reported-by: Luca Weiss Signed-off-by: Brian Norris Tested-by: Luca Weiss Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220422100824.v4.1.I484f4ee35609f78b932bd50feed639c29e64997e@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index e7ea45386c22..efa95dc4fc4e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1384,13 +1384,17 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + /* + * Bump to HS timing and frequency. Some cards don't handle + * SEND_STATUS reliably at the initial frequency. + */ mmc_set_timing(host, MMC_TIMING_MMC_HS); + mmc_set_bus_speed(card); + err = mmc_switch_status(card, true); if (err) goto out_err; - mmc_set_clock(host, card->ext_csd.hs_max_dtr); - /* Switch card to DDR with strobe bit */ val = EXT_CSD_DDR_BUS_WIDTH_8 | EXT_CSD_BUS_WIDTH_STROBE; err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, @@ -1448,7 +1452,7 @@ out_err: static int mmc_select_hs200(struct mmc_card *card) { struct mmc_host *host = card->host; - unsigned int old_timing, old_signal_voltage; + unsigned int old_timing, old_signal_voltage, old_clock; int err = -EINVAL; u8 val; @@ -1479,8 +1483,17 @@ static int mmc_select_hs200(struct mmc_card *card) false, true, MMC_CMD_RETRIES); if (err) goto err; + + /* + * Bump to HS timing and frequency. Some cards don't handle + * SEND_STATUS reliably at the initial frequency. + * NB: We can't move to full (HS200) speeds until after we've + * successfully switched over. + */ old_timing = host->ios.timing; + old_clock = host->ios.clock; mmc_set_timing(host, MMC_TIMING_MMC_HS200); + mmc_set_clock(card->host, card->ext_csd.hs_max_dtr); /* * For HS200, CRC errors are not a reliable way to know the @@ -1493,8 +1506,10 @@ static int mmc_select_hs200(struct mmc_card *card) * mmc_select_timing() assumes timing has not changed if * it is a switch error. */ - if (err == -EBADMSG) + if (err == -EBADMSG) { + mmc_set_clock(host, old_clock); mmc_set_timing(host, old_timing); + } } err: if (err) { From c7428dbddcf4ea1919e1c8e15f715b94ca359268 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 22 Apr 2022 17:36:28 -0700 Subject: [PATCH 330/803] usb: dwc3: gadget: Return proper request status If the user sets the usb_request's no_interrupt, then there will be no completion event for the request. Currently the driver incorrectly uses the event status of a different request to report the status for a request with no_interrupt. The dwc3 driver needs to check the TRB status associated with the request when reporting its status. Note: this is only applicable to missed_isoc TRB completion status, but the other status are also listed for completeness/documentation. Fixes: 6d8a019614f3 ("usb: dwc3: gadget: check for Missed Isoc from event status") Cc: Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/db2c80108286cfd108adb05bad52138b78d7c3a7.1650673655.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ab725d2262d6..0b9c2493844a 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3274,6 +3274,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, struct dwc3_request *req, int status) { + int request_status; int ret; if (req->request.num_mapped_sgs) @@ -3294,7 +3295,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } - dwc3_gadget_giveback(dep, req, status); + /* + * The event status only reflects the status of the TRB with IOC set. + * For the requests that don't set interrupt on completion, the driver + * needs to check and return the status of the completed TRBs associated + * with the request. Use the status of the last TRB of the request. + */ + if (req->request.no_interrupt) { + struct dwc3_trb *trb; + + trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue); + switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) { + case DWC3_TRBSTS_MISSED_ISOC: + /* Isoc endpoint only */ + request_status = -EXDEV; + break; + case DWC3_TRB_STS_XFER_IN_PROG: + /* Applicable when End Transfer with ForceRM=0 */ + case DWC3_TRBSTS_SETUP_PENDING: + /* Control endpoint only */ + case DWC3_TRBSTS_OK: + default: + request_status = 0; + break; + } + } else { + request_status = status; + } + + dwc3_gadget_giveback(dep, req, request_status); out: return ret; From 03e607cbb2931374db1825f371e9c7f28526d3f4 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 25 Apr 2022 13:14:09 -0400 Subject: [PATCH 331/803] usb: phy: generic: Get the vbus supply While support for working with a vbus was added, the regulator was never actually gotten (despite what was documented). Fix this by actually getting the supply from the device tree. Fixes: 7acc9973e3c4 ("usb: phy: generic: add vbus support") Cc: stable Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20220425171412.1188485-3-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 661a229c105d..34b9f8140187 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -268,6 +268,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); + if (PTR_ERR(nop->vbus_draw) == -ENODEV) + nop->vbus_draw = NULL; + if (IS_ERR(nop->vbus_draw)) + return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), + "could not get vbus regulator\n"); + nop->dev = dev; nop->phy.dev = nop->dev; nop->phy.label = "nop-xceiv"; From dc3ae06c5f2170d879ff58696f629d8c3868aec3 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 24 Apr 2022 11:26:21 -0500 Subject: [PATCH 332/803] drm/sun4i: Remove obsolete references to PHYS_OFFSET commit b4bdc4fbf8d0 ("soc: sunxi: Deal with the MBUS DMA offsets in a central place") added a platform device notifier that sets the DMA offset for all of the display engine frontend and backend devices. The code applying the offset to DMA buffer physical addresses was then removed from the backend driver in commit 756668ba682e ("drm/sun4i: backend: Remove the MBUS quirks"), but the code subtracting PHYS_OFFSET was left in the frontend driver. As a result, the offset was applied twice in the frontend driver. This likely went unnoticed because it only affects specific configurations (scaling or certain pixel formats) where the frontend is used, on boards with both one of these older SoCs and more than 1 GB of DRAM. In addition, the references to PHYS_OFFSET prevent compiling the driver on architectures where PHYS_OFFSET is not defined. Fixes: b4bdc4fbf8d0 ("soc: sunxi: Deal with the MBUS DMA offsets in a central place") Reviewed-by: Jernej Skrabec Signed-off-by: Samuel Holland Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220424162633.12369-4-samuel@sholland.org --- drivers/gpu/drm/sun4i/sun4i_frontend.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c index 56ae38389db0..462fae73eae9 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.c +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c @@ -222,13 +222,11 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend, /* Set the physical address of the buffer in memory */ paddr = drm_fb_cma_get_gem_addr(fb, state, 0); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #0 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR0_REG, paddr); if (fb->format->num_planes > 1) { paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 2 : 1); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #1 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR1_REG, paddr); @@ -236,7 +234,6 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend, if (fb->format->num_planes > 2) { paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 1 : 2); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #2 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR2_REG, paddr); From e57f9af73d6b0ffb5f1aeaf6cec9a751dd8535c9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Apr 2022 20:51:50 +0200 Subject: [PATCH 333/803] gfs2: Don't re-check for write past EOF unnecessarily Only re-check for direct I/O writes past the end of the file after re-acquiring the inode glock. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 22b41acfbbc3..8d889235afcd 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -899,10 +899,10 @@ retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out; +retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, From 1d661ed54d8613c97bcff2c7d6181c61e482a1da Mon Sep 17 00:00:00 2001 From: Adam Zabrocki Date: Fri, 22 Apr 2022 18:40:27 +0200 Subject: [PATCH 334/803] kprobes: Fix KRETPROBES when CONFIG_KRETPROBE_ON_RETHOOK is set The recent kernel change in 73f9b911faa7 ("kprobes: Use rethook for kretprobe if possible"), introduced a potential NULL pointer dereference bug in the KRETPROBE mechanism. The official Kprobes documentation defines that "Any or all handlers can be NULL". Unfortunately, there is a missing return handler verification to fulfill these requirements and can result in a NULL pointer dereference bug. This patch adds such verification in kretprobe_rethook_handler() function. Fixes: 73f9b911faa7 ("kprobes: Use rethook for kretprobe if possible") Signed-off-by: Adam Zabrocki Signed-off-by: Daniel Borkmann Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Naveen N. Rao Cc: Anil S. Keshavamurthy Link: https://lore.kernel.org/bpf/20220422164027.GA7862@pi3.com.pl --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dbe57df2e199..dd58c0be9ce2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2126,7 +2126,7 @@ static void kretprobe_rethook_handler(struct rethook_node *rh, void *data, struct kprobe_ctlblk *kcb; /* The data must NOT be null. This means rethook data structure is broken. */ - if (WARN_ON_ONCE(!data)) + if (WARN_ON_ONCE(!data) || !rp->handler) return; __this_cpu_write(current_kprobe, &rp->kp); From aa22125c57f9e577f0a667e4fa07fc3fa8ca1e60 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 23 Apr 2022 14:12:39 +0100 Subject: [PATCH 335/803] ASoC: ops: Validate input values in snd_soc_put_volsw_range() Check that values written via snd_soc_put_volsw_range() are within the range advertised by the control, ensuring that we don't write out of spec values to the hardware. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220423131239.3375261-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 58347eadd219..e693070f51fe 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -519,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int err, ret; + int err, ret, tmp; + + tmp = ucontrol->value.integer.value[0]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min + 1) + return -EINVAL; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -534,6 +542,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { + tmp = ucontrol->value.integer.value[1]; + if (tmp < 0) + return -EINVAL; + if (mc->platform_max && tmp > mc->platform_max) + return -EINVAL; + if (tmp > mc->max - mc->min + 1) + return -EINVAL; + if (invert) val = (max - ucontrol->value.integer.value[1]) & mask; else From ba3beec2ec1d3b4fd8672ca6e781dac4b3267f6e Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 25 Apr 2022 17:37:45 +0200 Subject: [PATCH 336/803] xsk: Fix possible crash when multiple sockets are created Fix a crash that happens if an Rx only socket is created first, then a second socket is created that is Tx only and bound to the same umem as the first socket and also the same netdev and queue_id together with the XDP_SHARED_UMEM flag. In this specific case, the tx_descs array page pool was not created by the first socket as it was an Rx only socket. When the second socket is bound it needs this tx_descs array of this shared page pool as it has a Tx component, but unfortunately it was never allocated, leading to a crash. Note that this array is only used for zero-copy drivers using the batched Tx APIs, currently only ice and i40e. [ 5511.150360] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 5511.158419] #PF: supervisor write access in kernel mode [ 5511.164472] #PF: error_code(0x0002) - not-present page [ 5511.170416] PGD 0 P4D 0 [ 5511.173347] Oops: 0002 [#1] PREEMPT SMP PTI [ 5511.178186] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G E 5.18.0-rc1+ #97 [ 5511.187245] Hardware name: Intel Corp. GRANTLEY/GRANTLEY, BIOS GRRFCRB1.86B.0276.D07.1605190235 05/19/2016 [ 5511.198418] RIP: 0010:xsk_tx_peek_release_desc_batch+0x198/0x310 [ 5511.205375] Code: c0 83 c6 01 84 c2 74 6d 8d 46 ff 23 07 44 89 e1 48 83 c0 14 48 c1 e1 04 48 c1 e0 04 48 03 47 10 4c 01 c1 48 8b 50 08 48 8b 00 <48> 89 51 08 48 89 01 41 80 bd d7 00 00 00 00 75 82 48 8b 19 49 8b [ 5511.227091] RSP: 0018:ffffc90000003dd0 EFLAGS: 00010246 [ 5511.233135] RAX: 0000000000000000 RBX: ffff88810c8da600 RCX: 0000000000000000 [ 5511.241384] RDX: 000000000000003c RSI: 0000000000000001 RDI: ffff888115f555c0 [ 5511.249634] RBP: ffffc90000003e08 R08: 0000000000000000 R09: ffff889092296b48 [ 5511.257886] R10: 0000ffffffffffff R11: ffff889092296800 R12: 0000000000000000 [ 5511.266138] R13: ffff88810c8db500 R14: 0000000000000040 R15: 0000000000000100 [ 5511.274387] FS: 0000000000000000(0000) GS:ffff88903f800000(0000) knlGS:0000000000000000 [ 5511.283746] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5511.290389] CR2: 0000000000000008 CR3: 00000001046e2001 CR4: 00000000003706f0 [ 5511.298640] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5511.306892] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5511.315142] Call Trace: [ 5511.317972] [ 5511.320301] ice_xmit_zc+0x68/0x2f0 [ice] [ 5511.324977] ? ktime_get+0x38/0xa0 [ 5511.328913] ice_napi_poll+0x7a/0x6a0 [ice] [ 5511.333784] __napi_poll+0x2c/0x160 [ 5511.337821] net_rx_action+0xdd/0x200 [ 5511.342058] __do_softirq+0xe6/0x2dd [ 5511.346198] irq_exit_rcu+0xb5/0x100 [ 5511.350339] common_interrupt+0xa4/0xc0 [ 5511.354777] [ 5511.357201] [ 5511.359625] asm_common_interrupt+0x1e/0x40 [ 5511.364466] RIP: 0010:cpuidle_enter_state+0xd2/0x360 [ 5511.370211] Code: 49 89 c5 0f 1f 44 00 00 31 ff e8 e9 00 7b ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 72 02 00 00 31 ff e8 02 0c 80 ff fb 45 85 f6 <0f> 88 11 01 00 00 49 63 c6 4c 2b 2c 24 48 8d 14 40 48 8d 14 90 49 [ 5511.391921] RSP: 0018:ffffffff82a03e60 EFLAGS: 00000202 [ 5511.397962] RAX: ffff88903f800000 RBX: 0000000000000001 RCX: 000000000000001f [ 5511.406214] RDX: 0000000000000000 RSI: ffffffff823400b9 RDI: ffffffff8234c046 [ 5511.424646] RBP: ffff88810a384800 R08: 000005032a28c046 R09: 0000000000000008 [ 5511.443233] R10: 000000000000000b R11: 0000000000000006 R12: ffffffff82bcf700 [ 5511.461922] R13: 000005032a28c046 R14: 0000000000000001 R15: 0000000000000000 [ 5511.480300] cpuidle_enter+0x29/0x40 [ 5511.494329] do_idle+0x1c7/0x250 [ 5511.507610] cpu_startup_entry+0x19/0x20 [ 5511.521394] start_kernel+0x649/0x66e [ 5511.534626] secondary_startup_64_no_verify+0xc3/0xcb [ 5511.549230] Detect such case during bind() and allocate this memory region via newly introduced xp_alloc_tx_descs(). Also, use kvcalloc instead of kcalloc as for other buffer pool allocations, so that it matches the kvfree() from xp_destroy(). Fixes: d1bc532e99be ("i40e: xsk: Move tmp desc array from driver to pool") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220425153745.481322-1-maciej.fijalkowski@intel.com --- include/net/xsk_buff_pool.h | 1 + net/xdp/xsk.c | 13 +++++++++++++ net/xdp/xsk_buff_pool.c | 16 ++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 5554ee75e7da..647722e847b4 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -97,6 +97,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, struct net_device *dev, u16 queue_id); +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7d3a00cb24ec..3a9348030e20 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xp_get_pool(umem_xs->pool); xs->pool = umem_xs->pool; + + /* If underlying shared umem was created without Tx + * ring, allocate Tx descs array that Tx batching API + * utilizes + */ + if (xs->tx && !xs->pool->tx_descs) { + err = xp_alloc_tx_descs(xs->pool, xs); + if (err) { + xp_put_pool(xs->pool); + sockfd_put(sock); + goto out_unlock; + } + } } xdp_get_umem(umem_xs->umem); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index af040ffa14ff..87bdd71c7bb6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool) kvfree(pool); } +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs) +{ + pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), + GFP_KERNEL); + if (!pool->tx_descs) + return -ENOMEM; + + return 0; +} + struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { @@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (!pool->heads) goto out; - if (xs->tx) { - pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); - if (!pool->tx_descs) + if (xs->tx) + if (xp_alloc_tx_descs(pool, xs)) goto out; - } pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; From ac0280a9ca106c5501257e79d165f968712b5899 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 4 Apr 2022 11:05:27 +0200 Subject: [PATCH 337/803] RISC-V: configs: Configs that had RPMSG_CHAR now get RPMSG_CTRL In the commit 617d32938d1b ("rpmsg: Move the rpmsg control device from rpmsg_char to rpmsg_ctrl"), we split the rpmsg_char driver in two. By default give everyone who had the old driver enabled the rpmsg_ctrl driver too. Signed-off-by: Arnaud Pouliquen Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20220404090527.582217-1-arnaud.pouliquen@foss.st.com Reviewed-by: Mathieu Poirier Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + arch/riscv/configs/rv32_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 30e3017f22bc..0cc17db8aaba 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -101,6 +101,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 7e5efdc3829d..6cd9d84d3e13 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -93,6 +93,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y From aaf461af729b81dbb19ec33abe6da74702b352d2 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 1 Apr 2022 12:40:52 +0200 Subject: [PATCH 338/803] ice: Fix incorrect locking in ice_vc_process_vf_msg() Usage of mutex_trylock() in ice_vc_process_vf_msg() is incorrect because message sent from VF is ignored and never processed. Use mutex_lock() instead to fix the issue. It is safe because this mutex is used to prevent races between VF related NDOs and handlers processing request messages from VF and these handlers are running in ice_service_task() context. Additionally move this mutex lock prior ice_vc_is_opcode_allowed() call to avoid potential races during allowlist access. Fixes: e6ba5273d4ed ("ice: Fix race conditions between virtchnl handling and VF ndo ops") Signed-off-by: Ivan Vecera Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 69ff4b929772..5612c032f15a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3642,14 +3642,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) err = -EINVAL; } - if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { - ice_vc_send_msg_to_vf(vf, v_opcode, - VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, - 0); - ice_put_vf(vf); - return; - } - error_handler: if (err) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, @@ -3660,12 +3652,13 @@ error_handler: return; } - /* VF is being configured in another context that triggers a VFR, so no - * need to process this message - */ - if (!mutex_trylock(&vf->cfg_lock)) { - dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", - vf->vf_id); + mutex_lock(&vf->cfg_lock); + + if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { + ice_vc_send_msg_to_vf(vf, v_opcode, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, + 0); + mutex_unlock(&vf->cfg_lock); ice_put_vf(vf); return; } From 77d64d285be5f8d427893e9c54425b1e4f5d9be7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 19 Apr 2022 16:22:21 +0200 Subject: [PATCH 339/803] ice: Protect vf_state check by cfg_lock in ice_vc_process_vf_msg() Previous patch labelled "ice: Fix incorrect locking in ice_vc_process_vf_msg()" fixed an issue with ignored messages sent by VF driver but a small race window still left. Recently caught trace during 'ip link set ... vf 0 vlan ...' operation: [ 7332.995625] ice 0000:3b:00.0: Clearing port VLAN on VF 0 [ 7333.001023] iavf 0000:3b:01.0: Reset indication received from the PF [ 7333.007391] iavf 0000:3b:01.0: Scheduling reset task [ 7333.059575] iavf 0000:3b:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 3 [ 7333.059626] ice 0000:3b:00.0: Invalid message from VF 0, opcode 3, len 4, error -1 Setting of VLAN for VF causes a reset of the affected VF using ice_reset_vf() function that runs with cfg_lock taken: 1. ice_notify_vf_reset() informs IAVF driver that reset is needed and IAVF schedules its own reset procedure 2. Bit ICE_VF_STATE_DIS is set in vf->vf_state 3. Misc initialization steps 4. ice_sriov_post_vsi_rebuild() -> ice_vf_set_initialized() and that clears ICE_VF_STATE_DIS in vf->vf_state Step 3 is mentioned race window because IAVF reset procedure runs in parallel and one of its step is sending of VIRTCHNL_OP_GET_VF_RESOURCES message (opcode==3). This message is handled in ice_vc_process_vf_msg() and if it is received during the mentioned race window then it's marked as invalid and error is returned to VF driver. Protect vf_state check in ice_vc_process_vf_msg() by cfg_lock to avoid this race condition. Fixes: e6ba5273d4ed ("ice: Fix race conditions between virtchnl handling and VF ndo ops") Tested-by: Fei Liu Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 5612c032f15a..b72606c9e6d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3625,6 +3625,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) return; } + mutex_lock(&vf->cfg_lock); + /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) { err = -EPERM; @@ -3648,19 +3650,14 @@ error_handler: NULL, 0); dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); - ice_put_vf(vf); - return; + goto finish; } - mutex_lock(&vf->cfg_lock); - if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); - mutex_unlock(&vf->cfg_lock); - ice_put_vf(vf); - return; + goto finish; } switch (v_opcode) { @@ -3773,6 +3770,7 @@ error_handler: vf_id, v_opcode, err); } +finish: mutex_unlock(&vf->cfg_lock); ice_put_vf(vf); } From b537752e6cbf0e4475c165178ca02241b53ff6ef Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Wed, 13 Apr 2022 17:37:45 +0200 Subject: [PATCH 340/803] ice: wait 5 s for EMP reset after firmware flash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wait 5 s for EMP reset after firmware flash. Code was extracted from OOT driver (ice v1.8.3 downloaded from sourceforge). Without this wait, fw_activate let card in inconsistent state and recoverable only by second flash/activate. Flash was tested on these fw's: From -> To 3.00 -> 3.10/3.20 3.10 -> 3.00/3.20 3.20 -> 3.00/3.10 Reproducer: [root@host ~]# devlink dev flash pci/0000:ca:00.0 file E810_XXVDA4_FH_O_SEC_FW_1p6p1p9_NVM_3p10_PLDMoMCTP_0.11_8000AD7B.bin Preparing to flash [fw.mgmt] Erasing [fw.mgmt] Erasing done [fw.mgmt] Flashing 100% [fw.mgmt] Flashing done 100% [fw.undi] Erasing [fw.undi] Erasing done [fw.undi] Flashing 100% [fw.undi] Flashing done 100% [fw.netlist] Erasing [fw.netlist] Erasing done [fw.netlist] Flashing 100% [fw.netlist] Flashing done 100% Activate new firmware by devlink reload [root@host ~]# devlink dev reload pci/0000:ca:00.0 action fw_activate reload_actions_performed: fw_activate [root@host ~]# ip link show ens7f0 71: ens7f0: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether b4:96:91:dc:72:e0 brd ff:ff:ff:ff:ff:ff altname enp202s0f0 dmesg after flash: [ 55.120788] ice: Copyright (c) 2018, Intel Corporation. [ 55.274734] ice 0000:ca:00.0: Get PHY capabilities failed status = -5, continuing anyway [ 55.569797] ice 0000:ca:00.0: The DDP package was successfully loaded: ICE OS Default Package version 1.3.28.0 [ 55.603629] ice 0000:ca:00.0: Get PHY capability failed. [ 55.608951] ice 0000:ca:00.0: ice_init_nvm_phy_type failed: -5 [ 55.647348] ice 0000:ca:00.0: PTP init successful [ 55.675536] ice 0000:ca:00.0: DCB is enabled in the hardware, max number of TCs supported on this port are 8 [ 55.685365] ice 0000:ca:00.0: FW LLDP is disabled, DCBx/LLDP in SW mode. [ 55.692179] ice 0000:ca:00.0: Commit DCB Configuration to the hardware [ 55.701382] ice 0000:ca:00.0: 126.024 Gb/s available PCIe bandwidth, limited by 16.0 GT/s PCIe x8 link at 0000:c9:02.0 (capable of 252.048 Gb/s with 16.0 GT/s PCIe x16 link) Reboot doesn’t help, only second flash/activate with OOT or patched driver put card back in consistent state. After patch: [root@host ~]# devlink dev flash pci/0000:ca:00.0 file E810_XXVDA4_FH_O_SEC_FW_1p6p1p9_NVM_3p10_PLDMoMCTP_0.11_8000AD7B.bin Preparing to flash [fw.mgmt] Erasing [fw.mgmt] Erasing done [fw.mgmt] Flashing 100% [fw.mgmt] Flashing done 100% [fw.undi] Erasing [fw.undi] Erasing done [fw.undi] Flashing 100% [fw.undi] Flashing done 100% [fw.netlist] Erasing [fw.netlist] Erasing done [fw.netlist] Flashing 100% [fw.netlist] Flashing done 100% Activate new firmware by devlink reload [root@host ~]# devlink dev reload pci/0000:ca:00.0 action fw_activate reload_actions_performed: fw_activate [root@host ~]# ip link show ens7f0 19: ens7f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether b4:96:91:dc:72:e0 brd ff:ff:ff:ff:ff:ff altname enp202s0f0 Fixes: 399e27dbbd9e94 ("ice: support immediate firmware activation via devlink reload") Signed-off-by: Petr Oros Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5b1198859da7..9a0a358a15c2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6929,12 +6929,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); +#define ICE_EMP_RESET_SLEEP_MS 5000 if (reset_type == ICE_RESET_EMPR) { /* If an EMP reset has occurred, any previously pending flash * update will have completed. We no longer know whether or * not the NVM update EMP reset is restricted. */ pf->fw_emp_reset_disabled = false; + + msleep(ICE_EMP_RESET_SLEEP_MS); } err = ice_init_all_ctrlq(hw); From b668f4cd715a297737c6e5952bc609a25b9af944 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 12 Apr 2022 10:34:22 -0700 Subject: [PATCH 341/803] ice: fix use-after-free when deinitializing mailbox snapshot During ice_sriov_configure, if num_vfs is 0, we are being asked by the kernel to remove all VFs. The driver first de-initializes the snapshot before freeing all the VFs. This results in a use-after-free BUG detected by KASAN. The bug occurs because the snapshot can still be accessed until all VFs are removed. Fix this by freeing all the VFs first before calling ice_mbx_deinit_snapshot. [ +0.032591] ================================================================== [ +0.000021] BUG: KASAN: use-after-free in ice_mbx_vf_state_handler+0x1c3/0x410 [ice] [ +0.000315] Write of size 28 at addr ffff889908eb6f28 by task kworker/55:2/1530996 [ +0.000029] CPU: 55 PID: 1530996 Comm: kworker/55:2 Kdump: loaded Tainted: G S I 5.17.0-dirty #1 [ +0.000022] Hardware name: Dell Inc. PowerEdge R740/0923K0, BIOS 1.6.13 12/17/2018 [ +0.000013] Workqueue: ice ice_service_task [ice] [ +0.000279] Call Trace: [ +0.000012] [ +0.000011] dump_stack_lvl+0x33/0x42 [ +0.000030] print_report.cold.13+0xb2/0x6b3 [ +0.000028] ? ice_mbx_vf_state_handler+0x1c3/0x410 [ice] [ +0.000295] kasan_report+0xa5/0x120 [ +0.000026] ? __switch_to_asm+0x21/0x70 [ +0.000024] ? ice_mbx_vf_state_handler+0x1c3/0x410 [ice] [ +0.000298] kasan_check_range+0x183/0x1e0 [ +0.000019] memset+0x1f/0x40 [ +0.000018] ice_mbx_vf_state_handler+0x1c3/0x410 [ice] [ +0.000304] ? ice_conv_link_speed_to_virtchnl+0x160/0x160 [ice] [ +0.000297] ? ice_vsi_dis_spoofchk+0x40/0x40 [ice] [ +0.000305] ice_is_malicious_vf+0x1aa/0x250 [ice] [ +0.000303] ? ice_restore_all_vfs_msi_state+0x160/0x160 [ice] [ +0.000297] ? __mutex_unlock_slowpath.isra.15+0x410/0x410 [ +0.000022] ? ice_debug_cq+0xb7/0x230 [ice] [ +0.000273] ? __kasan_slab_alloc+0x2f/0x90 [ +0.000022] ? memset+0x1f/0x40 [ +0.000017] ? do_raw_spin_lock+0x119/0x1d0 [ +0.000022] ? rwlock_bug.part.2+0x60/0x60 [ +0.000024] __ice_clean_ctrlq+0x3a6/0xd60 [ice] [ +0.000273] ? newidle_balance+0x5b1/0x700 [ +0.000026] ? ice_print_link_msg+0x2f0/0x2f0 [ice] [ +0.000271] ? update_cfs_group+0x1b/0x140 [ +0.000018] ? load_balance+0x1260/0x1260 [ +0.000022] ? ice_process_vflr_event+0x27/0x130 [ice] [ +0.000301] ice_service_task+0x136e/0x1470 [ice] [ +0.000281] process_one_work+0x3b4/0x6c0 [ +0.000030] worker_thread+0x65/0x660 [ +0.000023] ? __kthread_parkme+0xe4/0x100 [ +0.000021] ? process_one_work+0x6c0/0x6c0 [ +0.000020] kthread+0x179/0x1b0 [ +0.000018] ? kthread_complete_and_exit+0x20/0x20 [ +0.000022] ret_from_fork+0x22/0x30 [ +0.000026] [ +0.000018] Allocated by task 10742: [ +0.000013] kasan_save_stack+0x1c/0x40 [ +0.000018] __kasan_kmalloc+0x84/0xa0 [ +0.000016] kmem_cache_alloc_trace+0x16c/0x2e0 [ +0.000015] intel_iommu_probe_device+0xeb/0x860 [ +0.000015] __iommu_probe_device+0x9a/0x2f0 [ +0.000016] iommu_probe_device+0x43/0x270 [ +0.000015] iommu_bus_notifier+0xa7/0xd0 [ +0.000015] blocking_notifier_call_chain+0x90/0xc0 [ +0.000017] device_add+0x5f3/0xd70 [ +0.000014] pci_device_add+0x404/0xa40 [ +0.000015] pci_iov_add_virtfn+0x3b0/0x550 [ +0.000016] sriov_enable+0x3bb/0x600 [ +0.000013] ice_ena_vfs+0x113/0xa79 [ice] [ +0.000293] ice_sriov_configure.cold.17+0x21/0xe0 [ice] [ +0.000291] sriov_numvfs_store+0x160/0x200 [ +0.000015] kernfs_fop_write_iter+0x1db/0x270 [ +0.000018] new_sync_write+0x21d/0x330 [ +0.000013] vfs_write+0x376/0x410 [ +0.000013] ksys_write+0xba/0x150 [ +0.000012] do_syscall_64+0x3a/0x80 [ +0.000012] entry_SYSCALL_64_after_hwframe+0x44/0xae [ +0.000028] Freed by task 10742: [ +0.000011] kasan_save_stack+0x1c/0x40 [ +0.000015] kasan_set_track+0x21/0x30 [ +0.000016] kasan_set_free_info+0x20/0x30 [ +0.000012] __kasan_slab_free+0x104/0x170 [ +0.000016] kfree+0x9b/0x470 [ +0.000013] devres_destroy+0x1c/0x20 [ +0.000015] devm_kfree+0x33/0x40 [ +0.000012] ice_mbx_deinit_snapshot+0x39/0x70 [ice] [ +0.000295] ice_sriov_configure+0xb0/0x260 [ice] [ +0.000295] sriov_numvfs_store+0x1bc/0x200 [ +0.000015] kernfs_fop_write_iter+0x1db/0x270 [ +0.000016] new_sync_write+0x21d/0x330 [ +0.000012] vfs_write+0x376/0x410 [ +0.000012] ksys_write+0xba/0x150 [ +0.000012] do_syscall_64+0x3a/0x80 [ +0.000012] entry_SYSCALL_64_after_hwframe+0x44/0xae [ +0.000024] Last potentially related work creation: [ +0.000010] kasan_save_stack+0x1c/0x40 [ +0.000016] __kasan_record_aux_stack+0x98/0xa0 [ +0.000013] insert_work+0x34/0x160 [ +0.000015] __queue_work+0x20e/0x650 [ +0.000016] queue_work_on+0x4c/0x60 [ +0.000015] nf_nat_masq_schedule+0x297/0x2e0 [nf_nat] [ +0.000034] masq_device_event+0x5a/0x60 [nf_nat] [ +0.000031] raw_notifier_call_chain+0x5f/0x80 [ +0.000017] dev_close_many+0x1d6/0x2c0 [ +0.000015] unregister_netdevice_many+0x4e3/0xa30 [ +0.000015] unregister_netdevice_queue+0x192/0x1d0 [ +0.000014] iavf_remove+0x8f9/0x930 [iavf] [ +0.000058] pci_device_remove+0x65/0x110 [ +0.000015] device_release_driver_internal+0xf8/0x190 [ +0.000017] pci_stop_bus_device+0xb5/0xf0 [ +0.000014] pci_stop_and_remove_bus_device+0xe/0x20 [ +0.000016] pci_iov_remove_virtfn+0x19c/0x230 [ +0.000015] sriov_disable+0x4f/0x170 [ +0.000014] ice_free_vfs+0x9a/0x490 [ice] [ +0.000306] ice_sriov_configure+0xb8/0x260 [ice] [ +0.000294] sriov_numvfs_store+0x1bc/0x200 [ +0.000015] kernfs_fop_write_iter+0x1db/0x270 [ +0.000016] new_sync_write+0x21d/0x330 [ +0.000012] vfs_write+0x376/0x410 [ +0.000012] ksys_write+0xba/0x150 [ +0.000012] do_syscall_64+0x3a/0x80 [ +0.000012] entry_SYSCALL_64_after_hwframe+0x44/0xae [ +0.000025] The buggy address belongs to the object at ffff889908eb6f00 which belongs to the cache kmalloc-96 of size 96 [ +0.000016] The buggy address is located 40 bytes inside of 96-byte region [ffff889908eb6f00, ffff889908eb6f60) [ +0.000026] The buggy address belongs to the physical page: [ +0.000010] page:00000000b7e99a2e refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1908eb6 [ +0.000016] flags: 0x57ffffc0000200(slab|node=1|zone=2|lastcpupid=0x1fffff) [ +0.000024] raw: 0057ffffc0000200 ffffea0069d9fd80 dead000000000002 ffff88810004c780 [ +0.000015] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [ +0.000009] page dumped because: kasan: bad access detected [ +0.000016] Memory state around the buggy address: [ +0.000012] ffff889908eb6e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ +0.000014] ffff889908eb6e80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ +0.000014] >ffff889908eb6f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ +0.000011] ^ [ +0.000013] ffff889908eb6f80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ +0.000013] ffff889908eb7000: fa fb fb fb fb fb fb fb fc fc fc fc fa fb fb fb [ +0.000012] ================================================================== Fixes: 0891c89674e8 ("ice: warn about potentially malicious VFs") Reported-by: Slawomir Laba Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 8915a9d39e36..0c438219f7a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1046,8 +1046,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { - ice_mbx_deinit_snapshot(&pf->hw); ice_free_vfs(pf); + ice_mbx_deinit_snapshot(&pf->hw); if (pf->lag) ice_enable_lag(pf->lag); return 0; From c86cc5a3ec70f5644f1fa21610b943d0441bc1f7 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 22 Apr 2022 12:58:16 -0700 Subject: [PATCH 342/803] Bluetooth: hci_event: Fix checking for invalid handle on error status Commit d5ebaa7c5f6f6 introduces checks for handle range (e.g HCI_CONN_HANDLE_MAX) but controllers like Intel AX200 don't seem to respect the valid range int case of error status: > HCI Event: Connect Complete (0x03) plen 11 Status: Page Timeout (0x04) Handle: 65535 Address: 94:DB:56:XX:XX:XX (Sony Home Entertainment& Sound Products Inc) Link type: ACL (0x01) Encryption: Disabled (0x00) [1644965.827560] Bluetooth: hci0: Ignoring HCI_Connection_Complete for invalid handle Because of it is impossible to cleanup the connections properly since the stack would attempt to cancel the connection which is no longer in progress causing the following trace: < HCI Command: Create Connection Cancel (0x01|0x0008) plen 6 Address: 94:DB:56:XX:XX:XX (Sony Home Entertainment& Sound Products Inc) = bluetoothd: src/profile.c:record_cb() Unable to get Hands-Free Voice gateway SDP record: Connection timed out > HCI Event: Command Complete (0x0e) plen 10 Create Connection Cancel (0x01|0x0008) ncmd 1 Status: Unknown Connection Identifier (0x02) Address: 94:DB:56:XX:XX:XX (Sony Home Entertainment& Sound Products Inc) < HCI Command: Create Connection Cancel (0x01|0x0008) plen 6 Address: 94:DB:56:XX:XX:XX (Sony Home Entertainment& Sound Products Inc) Fixes: d5ebaa7c5f6f6 ("Bluetooth: hci_event: Ignore multiple conn complete events") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_event.c | 65 ++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5cb095b09a94..69ef31cea582 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -578,6 +578,7 @@ enum { #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f +#define HCI_ERROR_INVALID_PARAMETERS 0x12 #define HCI_ERROR_REMOTE_USER_TERM 0x13 #define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14 #define HCI_ERROR_REMOTE_POWER_OFF 0x15 diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index abaabfae19cc..3a9071b987f4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3067,13 +3067,9 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); @@ -3122,8 +3118,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status) { + if (!status) { conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + goto done; + } if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; @@ -3164,18 +3166,18 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } - } else { - conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); } if (conn->type == ACL_LINK) hci_sco_setup(conn, ev->status); - if (ev->status) { - hci_connect_cfm(conn, ev->status); +done: + if (status) { + conn->state = BT_CLOSED; + if (conn->type == ACL_LINK) + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + hci_connect_cfm(conn, status); hci_conn_del(conn); } else if (ev->link_type == SCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { @@ -3185,7 +3187,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, break; } - hci_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, status); } unlock: @@ -4676,6 +4678,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_sync_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; switch (ev->link_type) { case SCO_LINK: @@ -4690,12 +4693,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, return; } - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); @@ -4729,9 +4727,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - switch (ev->status) { + switch (status) { case 0x00: conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + conn->state = BT_CLOSED; + break; + } + conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -4775,8 +4781,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, } } - hci_connect_cfm(conn, ev->status); - if (ev->status) + hci_connect_cfm(conn, status); + if (status) hci_conn_del(conn); unlock: @@ -5527,11 +5533,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, struct smp_irk *irk; u8 addr_type; - if (handle > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle"); - return; - } - hci_dev_lock(hdev); /* All controllers implicitly stop advertising in the event of a @@ -5603,6 +5604,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle, + HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + } + if (status) { hci_le_conn_failed(conn, status); goto unlock; From aef2aa4fa98e18ea5d9345bf777ee698c8598728 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 22 Apr 2022 12:58:17 -0700 Subject: [PATCH 343/803] Bluetooth: hci_event: Fix creating hci_conn object on error status It is useless to create a hci_conn object if on error status as the result would be it being freed in the process and anyway it is likely the result of controller and host stack being out of sync. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3a9071b987f4..5a6c8afc51a0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3075,6 +3075,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (ev->status) + goto unlock; + /* Connection may not exist if auto-connected. Check the bredr * allowlist to see if this device is allowed to auto connect. * If link is an ACL type, create a connection class @@ -5542,6 +5548,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn = hci_lookup_le_connect(hdev); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (status) + goto unlock; + conn = hci_conn_add(hdev, LE_LINK, bdaddr, role); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); From 9b3628d79b46f06157affc56fdb218fdd4988321 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 22 Apr 2022 12:58:18 -0700 Subject: [PATCH 344/803] Bluetooth: hci_sync: Cleanup hci_conn if it cannot be aborted This attempts to cleanup the hci_conn if it cannot be aborted as otherwise it would likely result in having the controller and host stack out of sync with respect to connection handle. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 32 ++++++++++++++++++++++++-------- net/bluetooth/hci_event.c | 13 ++++--------- net/bluetooth/hci_sync.c | 11 ++++++++++- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d5377740e99c..8abd08245326 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1156,7 +1156,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role); void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); -void hci_le_conn_failed(struct hci_conn *conn, u8 status); +void hci_conn_failed(struct hci_conn *conn, u8 status); /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 84312c836549..fe803bee419a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work) /* Disable LE Advertising */ le_disable_advertising(hdev); hci_dev_lock(hdev); - hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); hci_dev_unlock(hdev); return; } @@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) EXPORT_SYMBOL(hci_get_route); /* This function requires the caller holds hdev->lock */ -void hci_le_conn_failed(struct hci_conn *conn, u8 status) +static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; struct hci_conn_params *params; @@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) params->conn = NULL; } - conn->state = BT_CLOSED; - /* If the status indicates successful cancellation of * the attempt (i.e. Unknown Connection Id) there's no point of * notifying failure since we'll go back to keep trying to @@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - hci_connect_cfm(conn, status); - - hci_conn_del(conn); - /* Since we may have temporarily stopped the background scanning in * favor of connection establishment, we should restart it. */ @@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) hci_enable_advertising(hdev); } +/* This function requires the caller holds hdev->lock */ +void hci_conn_failed(struct hci_conn *conn, u8 status) +{ + struct hci_dev *hdev = conn->hdev; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + switch (conn->type) { + case LE_LINK: + hci_le_conn_failed(conn, status); + break; + case ACL_LINK: + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + break; + } + + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); +} + static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5a6c8afc51a0..66451661283c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -3179,12 +3179,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, done: if (status) { - conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, status); - hci_connect_cfm(conn, status); - hci_conn_del(conn); + hci_conn_failed(conn, status); } else if (ev->link_type == SCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_CVSD: @@ -5623,7 +5618,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, } if (status) { - hci_le_conn_failed(conn, status); + hci_conn_failed(conn, status); goto unlock; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8f4c5698913d..13600bf120b0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { + int err; + switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - return hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn); + /* Cleanup hci_conn object if it cannot be cancelled as it + * likelly means the controller and host stack are out of sync. + */ + if (err) + hci_conn_failed(conn, err); + + return err; case BT_CONNECT2: return hci_reject_conn_sync(hdev, conn, reason); default: From 6510ea973d8d9d4a0cb2fb557b36bd1ab3eb49f6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Apr 2022 18:39:46 +0200 Subject: [PATCH 345/803] net: Use this_cpu_inc() to increment net->core_stats The macro dev_core_stats_##FIELD##_inc() disables preemption and invokes netdev_core_stats_alloc() to return a per-CPU pointer. netdev_core_stats_alloc() will allocate memory on its first invocation which breaks on PREEMPT_RT because it requires non-atomic context for memory allocation. This can be avoided by enabling preemption in netdev_core_stats_alloc() assuming the caller always disables preemption. It might be better to replace local_inc() with this_cpu_inc() now that dev_core_stats_##FIELD##_inc() gained a preempt-disable section and does not rely on already disabled preemption. This results in less instructions on x86-64: local_inc: | incl %gs:__preempt_count(%rip) # __preempt_count | movq 488(%rdi), %rax # _1->core_stats, _22 | testq %rax, %rax # _22 | je .L585 #, | add %gs:this_cpu_off(%rip), %rax # this_cpu_off, tcp_ptr__ | .L586: | testq %rax, %rax # _27 | je .L587 #, | incq (%rax) # _6->a.counter | .L587: | decl %gs:__preempt_count(%rip) # __preempt_count this_cpu_inc(), this patch: | movq 488(%rdi), %rax # _1->core_stats, _5 | testq %rax, %rax # _5 | je .L591 #, | .L585: | incq %gs:(%rax) # _18->rx_dropped Use unsigned long as type for the counter. Use this_cpu_inc() to increment the counter. Use a plain read of the counter. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/YmbO0pxgtKpCw4SY@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 21 +++++++++------------ net/core/dev.c | 14 +++++--------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59e27a2b7bf0..b1fbe21650bb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -199,10 +199,10 @@ struct net_device_stats { * Try to fit them in a single cache line, for dev_get_stats() sake. */ struct net_device_core_stats { - local_t rx_dropped; - local_t tx_dropped; - local_t rx_nohandler; -} __aligned(4 * sizeof(local_t)); + unsigned long rx_dropped; + unsigned long tx_dropped; + unsigned long rx_nohandler; +} __aligned(4 * sizeof(unsigned long)); #include #include @@ -3843,15 +3843,15 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev); +struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); -static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev) +static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) { /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); if (likely(p)) - return this_cpu_ptr(p); + return p; return netdev_core_stats_alloc(dev); } @@ -3859,14 +3859,11 @@ static inline struct net_device_core_stats *dev_core_stats(struct net_device *de #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats *p; \ + struct net_device_core_stats __percpu *p; \ \ - preempt_disable(); \ p = dev_core_stats(dev); \ - \ if (p) \ - local_inc(&p->FIELD); \ - preempt_enable(); \ + this_cpu_inc(p->FIELD); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) diff --git a/net/core/dev.c b/net/core/dev.c index 8c6c08446556..1461c2d9dec8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10304,7 +10304,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) +struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10315,11 +10315,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) free_percpu(p); /* This READ_ONCE() pairs with the cmpxchg() above */ - p = READ_ONCE(dev->core_stats); - if (!p) - return NULL; - - return this_cpu_ptr(p); + return READ_ONCE(dev->core_stats); } EXPORT_SYMBOL(netdev_core_stats_alloc); @@ -10356,9 +10352,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, for_each_possible_cpu(i) { core_stats = per_cpu_ptr(p, i); - storage->rx_dropped += local_read(&core_stats->rx_dropped); - storage->tx_dropped += local_read(&core_stats->tx_dropped); - storage->rx_nohandler += local_read(&core_stats->rx_nohandler); + storage->rx_dropped += READ_ONCE(core_stats->rx_dropped); + storage->tx_dropped += READ_ONCE(core_stats->tx_dropped); + storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler); } } return storage; From 71cffebf6358a7f5031f5b208bbdc1cb4db6e539 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 25 Apr 2022 17:20:27 +0200 Subject: [PATCH 346/803] net: dsa: lantiq_gswip: Don't set GSWIP_MII_CFG_RMII_CLK Commit 4b5923249b8fa4 ("net: dsa: lantiq_gswip: Configure all remaining GSWIP_MII_CFG bits") added all known bits in the GSWIP_MII_CFGp register. It helped bring this register into a well-defined state so the driver has to rely less on the bootloader to do things right. Unfortunately it also sets the GSWIP_MII_CFG_RMII_CLK bit without any possibility to configure it. Upon further testing it turns out that all boards which are supported by the GSWIP driver in OpenWrt which use an RMII PHY have a dedicated oscillator on the board which provides the 50MHz RMII reference clock. Don't set the GSWIP_MII_CFG_RMII_CLK bit (but keep the code which always clears it) to fix support for the Fritz!Box 7362 SL in OpenWrt. This is a board with two Atheros AR8030 RMII PHYs. With the "RMII clock" bit set the MAC also generates the RMII reference clock whose signal then conflicts with the signal from the oscillator on the board. This results in a constant cycle of the PHY detecting link up/down (and as a result of that: the two ports using the AR8030 PHYs are not working). At the time of writing this patch there's no known board where the MAC (GSWIP) has to generate the RMII reference clock. If needed this can be implemented in future by providing a device-tree flag so the GSWIP_MII_CFG_RMII_CLK bit can be toggled per port. Fixes: 4b5923249b8fa4 ("net: dsa: lantiq_gswip: Configure all remaining GSWIP_MII_CFG bits") Tested-by: Jan Hoffmann Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Link: https://lore.kernel.org/r/20220425152027.2220750-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index a416240d001b..12c15da55664 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1681,9 +1681,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, break; case PHY_INTERFACE_MODE_RMII: miicfg |= GSWIP_MII_CFG_MODE_RMIIM; - - /* Configure the RMII clock as output: */ - miicfg |= GSWIP_MII_CFG_RMII_CLK; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: From 588faa1ea5eecb351100ee5d187b9be99210f70d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Apr 2022 19:34:11 -0600 Subject: [PATCH 347/803] io_uring: check reserved fields for send/sendmsg We should check unused fields for non-zero and -EINVAL if they are set, making it consistent with other opcodes. Fixes: 0fa03c624d8f ("io_uring: add support for sendmsg()") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7625b29153b9..136c2fc49a1e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5207,6 +5207,8 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); From 5a1e99b61b0c81388cde0c808b3e4173907df19f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Apr 2022 19:34:57 -0600 Subject: [PATCH 348/803] io_uring: check reserved fields for recv/recvmsg We should check unused fields for non-zero and -EINVAL if they are set, making it consistent with other opcodes. Fixes: aa1fa28fc73e ("io_uring: add support for recvmsg()") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 136c2fc49a1e..92ac50f139cd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5420,6 +5420,8 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); From 8b202ee218395319aec1ef44f72043e1fbaccdd6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 25 Apr 2022 14:17:42 +0200 Subject: [PATCH 349/803] s390: disable -Warray-bounds gcc-12 shows a lot of array bound warnings on s390. This is caused by the S390_lowcore macro which uses a hardcoded address of 0. Wrapping that with absolute_pointer() works, but gcc no longer knows that a 12 bit displacement is sufficient to access lowcore. So it emits instructions like 'lghi %r1,0; l %rx,xxx(%r1)' instead of a single load/store instruction. As s390 stores variables often read/written in lowcore, this is considered problematic. Therefore disable -Warray-bounds on s390 for gcc-12 for the time being, until there is a better solution. Signed-off-by: Sven Schnelle Link: https://lore.kernel.org/r/yt9dzgkelelc.fsf@linux.ibm.com Link: https://lore.kernel.org/r/20220422134308.1613610-1-svens@linux.ibm.com Link: https://lore.kernel.org/r/20220425121742.3222133-1-svens@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e441b60b1812..df325eacf62d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -30,6 +30,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) + +ifdef CONFIG_CC_IS_GCC + ifeq ($(call cc-ifversion, -ge, 1200, y), y) + ifeq ($(call cc-ifversion, -lt, 1300, y), y) + KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) + KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) + endif + endif +endif + UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ From 08da09f028043fed9653331ae75bc310411f72e6 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Tue, 26 Apr 2022 20:51:09 -0700 Subject: [PATCH 350/803] hwmon: (pmbus) delta-ahe50dc-fan: work around hardware quirk CLEAR_FAULTS commands can apparently sometimes trigger catastrophic power output glitches on the ahe-50dc, so block them from being sent at all. Signed-off-by: Zev Weiss Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220427035109.3819-1-zev@bewilderbeest.net Fixes: d387d88ed045 ("hwmon: (pmbus) Add Delta AHE-50DC fan control module driver") Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/delta-ahe50dc-fan.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c index 40dffd9c4cbf..f546f0c12497 100644 --- a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c +++ b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c @@ -14,6 +14,21 @@ #define AHE50DC_PMBUS_READ_TEMP4 0xd0 +static int ahe50dc_fan_write_byte(struct i2c_client *client, int page, u8 value) +{ + /* + * The CLEAR_FAULTS operation seems to sometimes (unpredictably, perhaps + * 5% of the time or so) trigger a problematic phenomenon in which the + * fan speeds surge momentarily and at least some (perhaps all?) of the + * system's power outputs experience a glitch. + * + * However, according to Delta it should be OK to simply not send any + * CLEAR_FAULTS commands (the device doesn't seem to be capable of + * reporting any faults anyway), so just blackhole them unconditionally. + */ + return value == PMBUS_CLEAR_FAULTS ? -EOPNOTSUPP : -ENODATA; +} + static int ahe50dc_fan_read_word_data(struct i2c_client *client, int page, int phase, int reg) { /* temp1 in (virtual) page 1 is remapped to mfr-specific temp4 */ @@ -68,6 +83,7 @@ static struct pmbus_driver_info ahe50dc_fan_info = { PMBUS_HAVE_VIN | PMBUS_HAVE_FAN12 | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_STATUS_FAN34 | PMBUS_PAGE_VIRTUAL, .func[1] = PMBUS_HAVE_TEMP | PMBUS_PAGE_VIRTUAL, + .write_byte = ahe50dc_fan_write_byte, .read_word_data = ahe50dc_fan_read_word_data, }; From c61711c1c95791850be48dd65a1d72eb34ba719f Mon Sep 17 00:00:00 2001 From: Ajit Kumar Pandey Date: Tue, 26 Apr 2022 13:33:57 -0500 Subject: [PATCH 351/803] ASoC: SOF: Fix NULL pointer exception in sof_pci_probe callback We are accessing "desc->ops" in sof_pci_probe without checking "desc" pointer. This results in NULL pointer exception if pci_id->driver_data i.e desc pointer isn't defined in sof device probe: BUG: kernel NULL pointer dereference, address: 0000000000000060 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:sof_pci_probe+0x1e/0x17f [snd_sof_pci] Code: Unable to access opcode bytes at RIP 0xffffffffc043dff4. RSP: 0018:ffffac4b03b9b8d8 EFLAGS: 00010246 Add NULL pointer check for sof_dev_desc pointer to avoid such exception. Reviewed-by: Ranjani Sridharan Signed-off-by: Ajit Kumar Pandey Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220426183357.102155-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-pci-dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 12f5cff22448..7fa2649e56e5 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -153,6 +153,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) dev_dbg(&pci->dev, "PCI DSP detected"); + if (!desc) { + dev_err(dev, "error: no matching PCI descriptor\n"); + return -ENODEV; + } + if (!desc->ops) { dev_err(dev, "error: no matching PCI descriptor ops\n"); return -ENODEV; From c7aab4f17021b636a0ee75bcf28e06fb7c94ab48 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Apr 2022 11:47:11 +0200 Subject: [PATCH 352/803] netfilter: nf_conntrack_tcp: re-init for syn packets only Jaco Kroon reported tcp problems that Eric Dumazet and Neal Cardwell pinpointed to nf_conntrack tcp_in_window() bug. tcp trace shows following sequence: I > R Flags [S], seq 3451342529, win 62580, options [.. tfo [|tcp]> R > I Flags [S.], seq 2699962254, ack 3451342530, win 65535, options [..] R > I Flags [P.], seq 1:89, ack 1, [..] Note 3rd ACK is from responder to initiator so following branch is taken: } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { ... because state == TCP_CONNTRACK_SYN_RECV and dir is REPLY. This causes the scaling factor to be reset to 0: window scale option is only present in syn(ack) packets. This in turn makes nf_conntrack mark valid packets as out-of-window. This was always broken, it exists even in original commit where window tracking was added to ip_conntrack (nf_conntrack predecessor) in 2.6.9-rc1 kernel. Restrict to 'tcph->syn', just like the 3rd condtional added in commit 82b72cb94666 ("netfilter: conntrack: re-init state for retransmitted syn-ack"). Upon closer look, those conditionals/branches can be merged: Because earlier checks prevent syn-ack from showing up in original direction, the 'dir' checks in the conditional quoted above are redundant, remove them. Return early for pure syn retransmitted in reply direction (simultaneous open). Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Reported-by: Jaco Kroon Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8ec55cd72572..204a5cdff5b1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct, } } - } else if (((state->state == TCP_CONNTRACK_SYN_SENT - && dir == IP_CT_DIR_ORIGINAL) - || (state->state == TCP_CONNTRACK_SYN_RECV - && dir == IP_CT_DIR_REPLY)) - && after(end, sender->td_end)) { + } else if (tcph->syn && + after(end, sender->td_end) && + (state->state == TCP_CONNTRACK_SYN_SENT || + state->state == TCP_CONNTRACK_SYN_RECV)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - } else if (tcph->syn && dir == IP_CT_DIR_REPLY && - state->state == TCP_CONNTRACK_SYN_SENT) { - /* Retransmitted syn-ack, or syn (simultaneous open). * * Re-init state for this direction, just like for the first * syn(-ack) reply, it might differ in seq, ack or tcp options. @@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct, tcp_init_sender(sender, receiver, skb, dataoff, tcph, end, win); - if (!tcph->ack) + + if (dir == IP_CT_DIR_REPLY && !tcph->ack) return true; } From 626873c446f7559d5af8b48cefad903ffd85cf4e Mon Sep 17 00:00:00 2001 From: Volodymyr Mytnyk Date: Wed, 27 Apr 2022 14:09:00 +0300 Subject: [PATCH 353/803] netfilter: conntrack: fix udp offload timeout sysctl `nf_flowtable_udp_timeout` sysctl option is available only if CONFIG_NFT_FLOW_OFFLOAD enabled. But infra for this flow offload UDP timeout was added under CONFIG_NF_FLOW_TABLE config option. So, if you have CONFIG_NFT_FLOW_OFFLOAD disabled and CONFIG_NF_FLOW_TABLE enabled, the `nf_flowtable_udp_timeout` is not present in sysfs. Please note, that TCP flow offload timeout sysctl option is present even CONFIG_NFT_FLOW_OFFLOAD is disabled. I suppose it was a typo in commit that adds UDP flow offload timeout and CONFIG_NF_FLOW_TABLE should be used instead. Fixes: 975c57504da1 ("netfilter: conntrack: Introduce udp offload timeout configuration") Signed-off-by: Volodymyr Mytnyk Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3e1afd10a9b6..55aa55b252b2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD) +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { .procname = "nf_flowtable_udp_timeout", .maxlen = sizeof(unsigned int), From 8c936f9ea11ec4e35e288810a7503b5c841a355f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 26 Apr 2022 19:01:01 -1000 Subject: [PATCH 354/803] iocost: don't reset the inuse weight of under-weighted debtors When an iocg is in debt, its inuse weight is owned by debt handling and should stay at 1. This invariant was broken when determining the amount of surpluses at the beginning of donation calculation - when an iocg's hierarchical weight is too low, the iocg is excluded from donation calculation and its inuse is reset to its active regardless of its indebtedness, triggering warnings like the following: WARNING: CPU: 5 PID: 0 at block/blk-iocost.c:1416 iocg_kick_waitq+0x392/0x3a0 ... RIP: 0010:iocg_kick_waitq+0x392/0x3a0 Code: 00 00 be ff ff ff ff 48 89 4d a8 e8 98 b2 70 00 48 8b 4d a8 85 c0 0f 85 4a fe ff ff 0f 0b e9 43 fe ff ff 0f 0b e9 4d fe ff ff <0f> 0b e9 50 fe ff ff e8 a2 ae 70 00 66 90 0f 1f 44 00 00 55 48 89 RSP: 0018:ffffc90000200d08 EFLAGS: 00010016 ... ioc_timer_fn+0x2e0/0x1470 call_timer_fn+0xa1/0x2c0 ... As this happens only when an iocg's hierarchical weight is negligible, its impact likely is limited to triggering the warnings. Fix it by skipping resetting inuse of under-weighted debtors. Signed-off-by: Tejun Heo Reported-by: Rik van Riel Fixes: c421a3eb2e27 ("blk-iocost: revamp debt handling") Cc: stable@vger.kernel.org # v5.10+ Link: https://lore.kernel.org/r/YmjODd4aif9BzFuO@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 70a0a3d680a3..9bd670999d0a 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2322,7 +2322,17 @@ static void ioc_timer_fn(struct timer_list *timer) iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); - } else { + } else if (!iocg->abs_vdebt) { + /* + * @iocg doesn't have enough to donate. Reset + * its inuse to active. + * + * Don't reset debtors as their inuse's are + * owned by debt handling. This shouldn't affect + * donation calculuation in any meaningful way + * as @iocg doesn't have a meaningful amount of + * share anyway. + */ TRACE_IOCG_PATH(inuse_shortage, iocg, &now, iocg->inuse, iocg->active, iocg->hweight_inuse, new_hwi); From 4345ece8f0bcc682f1fb3b648922c9be5f7dbe6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Apr 2022 10:37:44 +0300 Subject: [PATCH 355/803] platform/x86: asus-wmi: Potential buffer overflow in asus_wmi_evaluate_method_buf() This code tests for if the obj->buffer.length is larger than the buffer but then it just does the memcpy() anyway. Fixes: 0f0ac158d28f ("platform/x86: asus-wmi: Add support for custom fan curves") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20220413073744.GB8812@kili Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 2104a2621e50..7e3c0a8e3997 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -371,10 +371,14 @@ static int asus_wmi_evaluate_method_buf(u32 method_id, switch (obj->type) { case ACPI_TYPE_BUFFER: - if (obj->buffer.length > size) + if (obj->buffer.length > size) { err = -ENOSPC; - if (obj->buffer.length == 0) + break; + } + if (obj->buffer.length == 0) { err = -ENODATA; + break; + } memcpy(ret_buffer, obj->buffer.pointer, obj->buffer.length); break; From 9fe1bb29ea0ab231aa916dad4bcf0c435beb5869 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 27 Apr 2022 13:49:56 +0200 Subject: [PATCH 356/803] platform/x86: asus-wmi: Fix driver not binding when fan curve control probe fails Before this commit fan_curve_check_present() was trying to not cause the probe to fail on devices without fan curve control by testing for known error codes returned by asus_wmi_evaluate_method_buf(). Checking for ENODATA or ENODEV, with the latter being returned by this function when an ACPI integer with a value of ASUS_WMI_UNSUPPORTED_METHOD is returned. But for other ACPI integer returns this function just returns them as is, including the ASUS_WMI_DSTS_UNKNOWN_BIT value of 2. On the Asus U36SD ASUS_WMI_DSTS_UNKNOWN_BIT gets returned, leading to: asus-nb-wmi: probe of asus-nb-wmi failed with error 2 Instead of playing whack a mole with error codes here, simply treat all errors as there not being any fan curves, fixing the driver no longer loading on the Asus U36SD laptop. Fixes: e3d13da7f77d ("platform/x86: asus-wmi: Fix regression when probing for fan curve control") BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2079125 Cc: Luke D. Jones Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220427114956.332919-1-hdegoede@redhat.com --- drivers/platform/x86/asus-wmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 7e3c0a8e3997..0e7fbed8a50d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2227,9 +2227,10 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, err = fan_curve_get_factory_default(asus, fan_dev); if (err) { - if (err == -ENODEV || err == -ENODATA) - return 0; - return err; + pr_debug("fan_curve_get_factory_default(0x%08x) failed: %d\n", + fan_dev, err); + /* Don't cause probe to fail on devices without fan-curves */ + return 0; } *available = true; From 89a8f23fee5ef7545ef6470ef61b61f336df7b49 Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Tue, 26 Apr 2022 14:08:27 +0200 Subject: [PATCH 357/803] platform/x86: dell-laptop: Add quirk entry for Latitude 7520 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Latitude 7520 supports AC timeouts, but it has no KBD_LED_AC_TOKEN and so changes to stop_timeout appear to have no effect if the laptop is plugged in. Signed-off-by: Gabriele Mazzotta Acked-by: Pali Rohár Link: https://lore.kernel.org/r/20220426120827.12363-1-gabriele.mzt@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/dell/dell-laptop.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/platform/x86/dell/dell-laptop.c b/drivers/platform/x86/dell/dell-laptop.c index 8230e7a68a5e..1321687d923e 100644 --- a/drivers/platform/x86/dell/dell-laptop.c +++ b/drivers/platform/x86/dell/dell-laptop.c @@ -80,6 +80,10 @@ static struct quirk_entry quirk_dell_inspiron_1012 = { .kbd_led_not_present = true, }; +static struct quirk_entry quirk_dell_latitude_7520 = { + .kbd_missing_ac_tag = true, +}; + static struct platform_driver platform_driver = { .driver = { .name = "dell-laptop", @@ -336,6 +340,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }, .driver_data = &quirk_dell_inspiron_1012, }, + { + .callback = dmi_matched, + .ident = "Dell Latitude 7520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 7520"), + }, + .driver_data = &quirk_dell_latitude_7520, + }, { } }; From e5483b45f6ed62e5434e74af2025a15d415480af Mon Sep 17 00:00:00 2001 From: Darryn Anton Jordan Date: Thu, 14 Apr 2022 16:24:43 +0200 Subject: [PATCH 358/803] platform/x86: gigabyte-wmi: added support for B660 GAMING X DDR4 motherboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This works on my system. Signed-off-by: Darryn Anton Jordan Acked-by: Thomas Weißschuh Link: https://lore.kernel.org/r/Ylguq87YG+9L3foV@hark Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 658bab4b7964..e87a931eab1e 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -148,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), From 8d75f7b4a3dfd5714a5dc87cfdaa27bd2d14aa48 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 27 Apr 2022 03:03:04 -0700 Subject: [PATCH 359/803] platform/x86: intel-uncore-freq: Prevent driver loading in guests Loading this driver in guests results in unchecked MSR access error for MSR 0x620. There is no use of reading and modifying package/die scope uncore MSRs in guests. So check for CPU feature X86_FEATURE_HYPERVISOR to prevent loading of this driver in guests. Fixes: dbce412a7733 ("platform/x86/intel-uncore-freq: Split common and enumeration part") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215870 Suggested-by: Borislav Petkov Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20220427100304.2562990-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index c61f804dd44e..8f9c571d7257 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -212,6 +212,9 @@ static int __init intel_uncore_init(void) const struct x86_cpu_id *id; int ret; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(intel_uncore_cpu_ids); if (!id) return -ENODEV; From 679c7a3f1596e8b5493c9473da4d967de540027c Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 20 Apr 2022 08:56:20 -0700 Subject: [PATCH 360/803] platform/x86/intel/sdsi: Handle leaky bucket To prevent an agent from indefinitely holding the mailbox firmware has implemented a leaky bucket algorithm. Repeated access to the mailbox may now incur a delay of up to 2.1 seconds. Add a retry loop that tries for up to 2.5 seconds to acquire the mailbox. Fixes: 2546c6000430 ("platform/x86: Add Intel Software Defined Silicon driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20220420155622.1763633-2-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/sdsi.c | 32 ++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c index 11d14cc0ff0a..11f211402479 100644 --- a/drivers/platform/x86/intel/sdsi.c +++ b/drivers/platform/x86/intel/sdsi.c @@ -51,6 +51,8 @@ #define MBOX_TIMEOUT_US 2000 #define MBOX_TIMEOUT_ACQUIRE_US 1000 #define MBOX_POLLING_PERIOD_US 100 +#define MBOX_ACQUIRE_NUM_RETRIES 5 +#define MBOX_ACQUIRE_RETRY_DELAY_MS 500 #define MBOX_MAX_PACKETS 4 #define MBOX_OWNER_NONE 0x00 @@ -263,7 +265,7 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info { u64 control; u32 owner; - int ret; + int ret, retries = 0; lockdep_assert_held(&priv->mb_lock); @@ -273,13 +275,29 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info if (owner != MBOX_OWNER_NONE) return -EBUSY; - /* Write first qword of payload */ - writeq(info->payload[0], priv->mbox_addr); + /* + * If there has been no recent transaction and no one owns the mailbox, + * we should acquire it in under 1ms. However, if we've accessed it + * recently it may take up to 2.1 seconds to acquire it again. + */ + do { + /* Write first qword of payload */ + writeq(info->payload[0], priv->mbox_addr); - /* Check for ownership */ - ret = readq_poll_timeout(priv->control_addr, control, - FIELD_GET(CTRL_OWNER, control) & MBOX_OWNER_INBAND, - MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US); + /* Check for ownership */ + ret = readq_poll_timeout(priv->control_addr, control, + FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_INBAND, + MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US); + + if (FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_NONE && + retries++ < MBOX_ACQUIRE_NUM_RETRIES) { + msleep(MBOX_ACQUIRE_RETRY_DELAY_MS); + continue; + } + + /* Either we got it or someone else did. */ + break; + } while (true); return ret; } From a30393b36ca84be7c70733b7c1e39d311f5919f3 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 20 Apr 2022 08:56:21 -0700 Subject: [PATCH 361/803] platform/x86/intel/sdsi: Poll on ready bit for writes Due to change in firmware flow, update mailbox writes to poll on ready bit instead of run_busy bit. This change makes the polling method consistent for both writes and reads, which also uses the ready bit. Fixes: 2546c6000430 ("platform/x86: Add Intel Software Defined Silicon driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20220420155622.1763633-3-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/sdsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c index 11f211402479..89729fed030c 100644 --- a/drivers/platform/x86/intel/sdsi.c +++ b/drivers/platform/x86/intel/sdsi.c @@ -245,8 +245,8 @@ static int sdsi_mbox_cmd_write(struct sdsi_priv *priv, struct sdsi_mbox_info *in FIELD_PREP(CTRL_PACKET_SIZE, info->size); writeq(control, priv->control_addr); - /* Poll on run_busy bit */ - ret = readq_poll_timeout(priv->control_addr, control, !(control & CTRL_RUN_BUSY), + /* Poll on ready bit */ + ret = readq_poll_timeout(priv->control_addr, control, control & CTRL_READY, MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_US); if (ret) From 00dd3ace931b4d2f6e5e9ccf4bf738fe46b64289 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 20 Apr 2022 08:56:22 -0700 Subject: [PATCH 362/803] platform/x86/intel/sdsi: Fix bug in multi packet reads Fix bug that added an offset to the mailbox addr during multi-packet reads. Did not affect current ABI since it doesn't support multi-packet transactions. Fixes: 2546c6000430 ("platform/x86: Add Intel Software Defined Silicon driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20220420155622.1763633-4-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/sdsi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c index 89729fed030c..c830e98dfa38 100644 --- a/drivers/platform/x86/intel/sdsi.c +++ b/drivers/platform/x86/intel/sdsi.c @@ -83,7 +83,7 @@ enum sdsi_command { struct sdsi_mbox_info { u64 *payload; - u64 *buffer; + void *buffer; int size; }; @@ -165,9 +165,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf total = 0; loop = 0; do { - int offset = SDSI_SIZE_MAILBOX * loop; - void __iomem *addr = priv->mbox_addr + offset; - u64 *buf = info->buffer + offset / SDSI_SIZE_CMD; + void *buf = info->buffer + (SDSI_SIZE_MAILBOX * loop); u32 packet_size; /* Poll on ready bit */ @@ -198,7 +196,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf break; } - sdsi_memcpy64_fromio(buf, addr, round_up(packet_size, SDSI_SIZE_CMD)); + sdsi_memcpy64_fromio(buf, priv->mbox_addr, round_up(packet_size, SDSI_SIZE_CMD)); total += packet_size; From eb2fd9b43fae0c51982ac4229535b6cfd77380db Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 23 Apr 2022 08:30:48 -0400 Subject: [PATCH 363/803] platform/x86/intel: pmc/core: change pmc_lpm_modes to static Sparse reports this issue core.c: note: in included file: core.h:239:12: warning: symbol 'pmc_lpm_modes' was not declared. Should it be static? Global variables should not be defined in headers. This only works because core.h is only included by core.c. Single file use variables should be static, so change its storage-class specifier to static. Signed-off-by: Tom Rix Reviewed-by: David E. Box Link: https://lore.kernel.org/r/20220423123048.591405-1-trix@redhat.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index a46d3b53bf61..7a059e02c265 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -236,7 +236,7 @@ enum ppfear_regs { #define ADL_LPM_STATUS_LATCH_EN_OFFSET 0x1704 #define ADL_LPM_LIVE_STATUS_OFFSET 0x1764 -const char *pmc_lpm_modes[] = { +static const char *pmc_lpm_modes[] = { "S0i2.0", "S0i2.1", "S0i2.2", From 233087ca063686964a53c829d547c7571e3f67bf Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 26 Apr 2022 23:41:05 +0300 Subject: [PATCH 364/803] floppy: disable FDRAWCMD by default Minh Yuan reported a concurrency use-after-free issue in the floppy code between raw_cmd_ioctl and seek_interrupt. [ It turns out this has been around, and that others have reported the KASAN splats over the years, but Minh Yuan had a reproducer for it and so gets primary credit for reporting it for this fix - Linus ] The problem is, this driver tends to break very easily and nowadays, nobody is expected to use FDRAWCMD anyway since it was used to manipulate non-standard formats. The risk of breaking the driver is higher than the risk presented by this race, and accessing the device requires privileges anyway. Let's just add a config option to completely disable this ioctl and leave it disabled by default. Distros shouldn't use it, and only those running on antique hardware might need to enable it. Link: https://lore.kernel.org/all/000000000000b71cdd05d703f6bf@google.com/ Link: https://lore.kernel.org/lkml/CAKcFiNC=MfYVW-Jt9A3=FPJpTwCD2PL_ULNCpsCVE5s8ZeBQgQ@mail.gmail.com Link: https://lore.kernel.org/all/CAEAjamu1FRhz6StCe_55XY5s389ZP_xmCF69k987En+1z53=eg@mail.gmail.com Reported-by: Minh Yuan Reported-by: syzbot+8e8958586909d62b6840@syzkaller.appspotmail.com Reported-by: cruise k Reported-by: Kyungtae Kim Suggested-by: Linus Torvalds Tested-by: Denis Efremov Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- drivers/block/Kconfig | 16 ++++++++++++++++ drivers/block/floppy.c | 43 +++++++++++++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 519b6d38d4df..fdb81f2794cd 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -33,6 +33,22 @@ config BLK_DEV_FD To compile this driver as a module, choose M here: the module will be called floppy. +config BLK_DEV_FD_RAWCMD + bool "Support for raw floppy disk commands (DEPRECATED)" + depends on BLK_DEV_FD + help + If you want to use actual physical floppies and expect to do + special low-level hardware accesses to them (access and use + non-standard formats, for example), then enable this. + + Note that the code enabled by this option is rarely used and + might be unstable or insecure, and distros should not enable it. + + Note: FDRAWCMD is deprecated and will be removed from the kernel + in the near future. + + If unsure, say N. + config AMIGA_FLOPPY tristate "Amiga floppy support" depends on AMIGA diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8c647532e3ce..d5b9ff9bcbb2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2982,6 +2982,8 @@ static const char *drive_name(int type, int drive) return "(null)"; } +#ifdef CONFIG_BLK_DEV_FD_RAWCMD + /* raw commands */ static void raw_cmd_done(int flag) { @@ -3181,6 +3183,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param) return ret; } +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + int ret; + + pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n"); + + if (type) + return -EINVAL; + if (lock_fdc(drive)) + return -EINTR; + set_floppy(drive); + ret = raw_cmd_ioctl(cmd, param); + if (ret == -EINTR) + return -EINTR; + process_fd_request(); + return ret; +} + +#else /* CONFIG_BLK_DEV_FD_RAWCMD */ + +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + return -EOPNOTSUPP; +} + +#endif + static int invalidate_drive(struct block_device *bdev) { /* invalidate the buffer track to force a reread */ @@ -3369,7 +3400,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int { int drive = (long)bdev->bd_disk->private_data; int type = ITYPE(drive_state[drive].fd_device); - int i; int ret; int size; union inparam { @@ -3520,16 +3550,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = &write_errors[drive]; break; case FDRAWCMD: - if (type) - return -EINVAL; - if (lock_fdc(drive)) - return -EINTR; - set_floppy(drive); - i = raw_cmd_ioctl(cmd, (void __user *)param); - if (i == -EINTR) - return -EINTR; - process_fd_request(); - return i; + return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param); case FDTWADDLE: if (lock_fdc(drive)) return -EINTR; From 7b5148be4a6e1119523f7546c5c3d112ed6c40c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Apr 2022 10:57:23 -0700 Subject: [PATCH 365/803] Add Eric Dumazet to networking maintainers Welcome Eric! Acked-by: Paolo Abeni Signed-off-by: David S. Miller Link: https://lore.kernel.org/r/20220426175723.417614-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d21963b8f26a..3c0f56b44c61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13624,6 +13624,7 @@ F: net/core/drop_monitor.c NETWORKING DRIVERS M: "David S. Miller" +M: Eric Dumazet M: Jakub Kicinski M: Paolo Abeni L: netdev@vger.kernel.org @@ -13671,6 +13672,7 @@ F: tools/testing/selftests/drivers/net/dsa/ NETWORKING [GENERAL] M: "David S. Miller" +M: Eric Dumazet M: Jakub Kicinski M: Paolo Abeni L: netdev@vger.kernel.org From 3f65b1e2f424f44585bd701024a3bfd0b1e0ade2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 26 Apr 2022 14:12:14 -0700 Subject: [PATCH 366/803] drm/msm/dp: remove fail safe mode related code Current DP driver implementation has adding safe mode done at dp_hpd_plug_handle() which is expected to be executed under event thread context. However there is possible circular locking happen (see blow stack trace) after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which is executed under drm_thread context. After review all possibilities methods and as discussed on https://patchwork.freedesktop.org/patch/483155/, supporting EDID compliance tests in the driver is quite hacky. As seen with other vendor drivers, supporting these will be much easier with IGT. Hence removing all the related fail safe code for it so that no possibility of circular lock will happen. Reviewed-by: Stephen Boyd Reviewed-by: Douglas Anderson Reviewed-by: Dmitry Baryshkov ====================================================== WARNING: possible circular locking dependency detected 5.15.35-lockdep #6 Tainted: G W ------------------------------------------------------ frecon/429 is trying to acquire lock: ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at: dp_panel_add_fail_safe_mode+0x4c/0xa0 but task is already holding lock: ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at: lock_crtcs+0xb4/0x124 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}: __mutex_lock_common+0x174/0x1a64 mutex_lock_nested+0x98/0xac lock_crtcs+0xb4/0x124 msm_atomic_commit_tail+0x330/0x748 commit_tail+0x19c/0x278 drm_atomic_helper_commit+0x1dc/0x1f0 drm_atomic_commit+0xc0/0xd8 drm_atomic_helper_set_config+0xb4/0x134 drm_mode_setcrtc+0x688/0x1248 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}: __mutex_lock_common+0x174/0x1a64 ww_mutex_lock+0xb8/0x278 modeset_lock+0x304/0x4ac drm_modeset_lock+0x4c/0x7c drmm_mode_config_init+0x4a8/0xc50 msm_drm_init+0x274/0xac0 msm_drm_bind+0x20/0x2c try_to_bring_up_master+0x3dc/0x470 __component_add+0x18c/0x3c0 component_add+0x1c/0x28 dp_display_probe+0x954/0xa98 platform_probe+0x124/0x15c really_probe+0x1b0/0x5f8 __driver_probe_device+0x174/0x20c driver_probe_device+0x70/0x134 __device_attach_driver+0x130/0x1d0 bus_for_each_drv+0xfc/0x14c __device_attach+0x1bc/0x2bc device_initial_probe+0x1c/0x28 bus_probe_device+0x94/0x178 deferred_probe_work_func+0x1a4/0x1f0 process_one_work+0x5d4/0x9dc worker_thread+0x898/0xccc kthread+0x2d4/0x3d4 ret_from_fork+0x10/0x20 -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}: ww_acquire_init+0x1c4/0x2c8 drm_modeset_acquire_init+0x44/0xc8 drm_helper_probe_single_connector_modes+0xb0/0x12dc drm_mode_getconnector+0x5dc/0xfe8 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}: __lock_acquire+0x2650/0x672c lock_acquire+0x1b4/0x4ac __mutex_lock_common+0x174/0x1a64 mutex_lock_nested+0x98/0xac dp_panel_add_fail_safe_mode+0x4c/0xa0 dp_hpd_plug_handle+0x1f0/0x280 dp_bridge_enable+0x94/0x2b8 drm_atomic_bridge_chain_enable+0x11c/0x168 drm_atomic_helper_commit_modeset_enables+0x500/0x740 msm_atomic_commit_tail+0x3e4/0x748 commit_tail+0x19c/0x278 drm_atomic_helper_commit+0x1dc/0x1f0 drm_atomic_commit+0xc0/0xd8 drm_atomic_helper_set_config+0xb4/0x134 drm_mode_setcrtc+0x688/0x1248 drm_ioctl_kernel+0x1e4/0x338 drm_ioctl+0x3a4/0x684 __arm64_sys_ioctl+0x118/0x154 invoke_syscall+0x78/0x224 el0_svc_common+0x178/0x200 do_el0_svc+0x94/0x13c el0_svc+0x5c/0xec el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 Changes in v2: -- re text commit title -- remove all fail safe mode Changes in v3: -- remove dp_panel_add_fail_safe_mode() from dp_panel.h -- add Fixes Changes in v5: -- to=dianders@chromium.org Changes in v6: -- fix Fixes commit ID Fixes: 8b2c181e3dcf ("drm/msm/dp: add fail safe mode outside of event_mutex context") Reported-by: Douglas Anderson Signed-off-by: Kuogee Hsieh Link: https://lore.kernel.org/r/1651007534-31842-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 6 ------ drivers/gpu/drm/msm/dp/dp_panel.c | 11 ----------- drivers/gpu/drm/msm/dp/dp_panel.h | 1 - 3 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index a42732b67349..178b774a5fbd 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -580,12 +580,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); mutex_unlock(&dp->event_mutex); - /* - * add fail safe mode outside event_mutex scope - * to avoid potiential circular lock with drm thread - */ - dp_panel_add_fail_safe_mode(dp->dp_display.connector); - /* uevent will complete connection part */ return 0; }; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 26c3653c99ec..26f4b6959c31 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct drm_connector *connector, return rc; } -void dp_panel_add_fail_safe_mode(struct drm_connector *connector) -{ - /* fail safe edid */ - mutex_lock(&connector->dev->mode_config.mutex); - if (drm_add_modes_noedid(connector, 640, 480)) - drm_set_preferred_mode(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); -} - int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector) { @@ -215,8 +206,6 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, rc = -ETIMEDOUT; goto end; } - - dp_panel_add_fail_safe_mode(connector); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 99739ea679a7..9023e5bb4b8b 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -59,7 +59,6 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel); int dp_panel_deinit(struct dp_panel *dp_panel); int dp_panel_timing_cfg(struct dp_panel *dp_panel); void dp_panel_dump_regs(struct dp_panel *dp_panel); -void dp_panel_add_fail_safe_mode(struct drm_connector *connector); int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector); u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp, From ad8d869343ae4a07a2038a4ca923f699308c8323 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 27 Apr 2022 10:21:51 -0700 Subject: [PATCH 367/803] kernfs: fix NULL dereferencing in kernfs_remove kernfs_remove supported NULL kernfs_node param to bail out but revent per-fs lock change introduced regression that dereferencing the param without NULL check so kernel goes crash. This patch checks the NULL kernfs_node in kernfs_remove and if so, just return. Quote from bug report by Jirka ``` The bug is triggered by running NAS Parallel benchmark suite on SuperMicro servers with 2x Xeon(R) Gold 6126 CPU. Here is the error log: [ 247.035564] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 247.036009] #PF: supervisor read access in kernel mode [ 247.036009] #PF: error_code(0x0000) - not-present page [ 247.036009] PGD 0 P4D 0 [ 247.036009] Oops: 0000 [#1] PREEMPT SMP PTI [ 247.058060] CPU: 1 PID: 6546 Comm: umount Not tainted 5.16.0393c3714081a53795bbff0e985d24146def6f57f+ #16 [ 247.058060] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 2.0b 03/07/2018 [ 247.058060] RIP: 0010:kernfs_remove+0x8/0x50 [ 247.058060] Code: 4c 89 e0 5b 5d 41 5c 41 5d 41 5e c3 49 c7 c4 f4 ff ff ff eb b2 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00 00 41 54 55 <48> 8b 47 08 48 89 fd 48 85 c0 48 0f 44 c7 4c 8b 60 50 49 83 c4 60 [ 247.058060] RSP: 0018:ffffbbfa48a27e48 EFLAGS: 00010246 [ 247.058060] RAX: 0000000000000001 RBX: ffffffff89e31f98 RCX: 0000000080200018 [ 247.058060] RDX: 0000000080200019 RSI: fffff6760786c900 RDI: 0000000000000000 [ 247.058060] RBP: ffffffff89e31f98 R08: ffff926b61b24d00 R09: 0000000080200018 [ 247.122048] R10: ffff926b61b24d00 R11: ffff926a8040c000 R12: ffff927bd09a2000 [ 247.122048] R13: ffffffff89e31fa0 R14: dead000000000122 R15: dead000000000100 [ 247.122048] FS: 00007f01be0a8c40(0000) GS:ffff926fa8e40000(0000) knlGS:0000000000000000 [ 247.122048] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 247.122048] CR2: 0000000000000008 CR3: 00000001145c6003 CR4: 00000000007706e0 [ 247.122048] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 247.122048] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 247.122048] PKRU: 55555554 [ 247.122048] Call Trace: [ 247.122048] [ 247.122048] rdt_kill_sb+0x29d/0x350 [ 247.122048] deactivate_locked_super+0x36/0xa0 [ 247.122048] cleanup_mnt+0x131/0x190 [ 247.122048] task_work_run+0x5c/0x90 [ 247.122048] exit_to_user_mode_prepare+0x229/0x230 [ 247.122048] syscall_exit_to_user_mode+0x18/0x40 [ 247.122048] do_syscall_64+0x48/0x90 [ 247.122048] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 247.122048] RIP: 0033:0x7f01be2d735b ``` Link: https://bugzilla.kernel.org/show_bug.cgi?id=215696 Link: https://lore.kernel.org/lkml/CAE4VaGDZr_4wzRn2___eDYRtmdPaGGJdzu_LCSkJYuY9BEO3cw@mail.gmail.com/ Fixes: 393c3714081a (kernfs: switch global kernfs_rwsem lock to per-fs lock) Cc: stable@vger.kernel.org Reported-by: Jirka Hladky Tested-by: Jirka Hladky Acked-by: Tejun Heo Signed-off-by: Minchan Kim Link: https://lore.kernel.org/r/20220427172152.3505364-1-minchan@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 61a8edc4ba8b..e205fde7163a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1406,7 +1406,12 @@ static void __kernfs_remove(struct kernfs_node *kn) */ void kernfs_remove(struct kernfs_node *kn) { - struct kernfs_root *root = kernfs_root(kn); + struct kernfs_root *root; + + if (!kn) + return; + + root = kernfs_root(kn); down_write(&root->kernfs_rwsem); __kernfs_remove(kn); From c7d2f89fea26c84d5accc55d9976dd7e5305e63a Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 12 Apr 2022 16:56:36 +0900 Subject: [PATCH 368/803] bus: fsl-mc-msi: Fix MSI descriptor mutex lock for msi_first_desc() Commit e8604b1447b4 introduced a call to the helper function msi_first_desc(), which needs MSI descriptor mutex lock before call. However, the required mutex lock was not added. This results in lockdep assertion: WARNING: CPU: 4 PID: 119 at kernel/irq/msi.c:274 msi_first_desc+0xd0/0x10c msi_first_desc+0xd0/0x10c fsl_mc_msi_domain_alloc_irqs+0x7c/0xc0 fsl_mc_populate_irq_pool+0x80/0x3cc Fix this by adding the mutex lock and unlock around the function call. Fixes: e8604b1447b4 ("bus: fsl-mc-msi: Simplify MSI descriptor handling") Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Thomas Gleixner Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220412075636.755454-1-shinichiro.kawasaki@wdc.com --- drivers/bus/fsl-mc/fsl-mc-msi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c index 5e0e4393ce4d..0cfe859a4ac4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-msi.c +++ b/drivers/bus/fsl-mc/fsl-mc-msi.c @@ -224,8 +224,12 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count) if (error) return error; + msi_lock_descs(dev); if (msi_first_desc(dev, MSI_DESC_ALL)) - return -EINVAL; + error = -EINVAL; + msi_unlock_descs(dev); + if (error) + return error; /* * NOTE: Calling this function will trigger the invocation of the From e5be15767e7e284351853cbaba80cde8620341fb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 25 Apr 2022 08:07:48 -0400 Subject: [PATCH 369/803] hex2bin: make the function hex_to_bin constant-time The function hex2bin is used to load cryptographic keys into device mapper targets dm-crypt and dm-integrity. It should take constant time independent on the processed data, so that concurrently running unprivileged code can't infer any information about the keys via microarchitectural convert channels. This patch changes the function hex_to_bin so that it contains no branches and no memory accesses. Note that this shouldn't cause performance degradation because the size of the new function is the same as the size of the old function (on x86-64) - and the new function causes no branch misprediction penalties. I compile-tested this function with gcc on aarch64 alpha arm hppa hppa64 i386 ia64 m68k mips32 mips64 powerpc powerpc64 riscv sh4 s390x sparc32 sparc64 x86_64 and with clang on aarch64 arm hexagon i386 mips32 mips64 powerpc powerpc64 s390x sparc32 sparc64 x86_64 to verify that there are no branches in the generated code. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- lib/hexdump.c | 32 +++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a890428bcc1a..fe6efb24d151 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -285,7 +285,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte) return buf; } -extern int hex_to_bin(char ch); +extern int hex_to_bin(unsigned char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); extern char *bin2hex(char *dst, const void *src, size_t count); diff --git a/lib/hexdump.c b/lib/hexdump.c index 9301578f98e8..369420ce553a 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -22,15 +22,33 @@ EXPORT_SYMBOL(hex_asc_upper); * * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad * input. + * + * This function is used to load cryptographic keys, so it is coded in such a + * way that there are no conditions or memory accesses that depend on data. + * + * Explanation of the logic: + * (ch - '9' - 1) is negative if ch <= '9' + * ('0' - 1 - ch) is negative if ch >= '0' + * we "and" these two values, so the result is negative if ch is in the range + * '0' ... '9' + * we are only interested in the sign, so we do a shift ">> 8"; note that right + * shift of a negative value is implementation-defined, so we cast the + * value to (unsigned) before the shift --- we have 0xffffff if ch is in + * the range '0' ... '9', 0 otherwise + * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is + * in the range '0' ... '9', 0 otherwise + * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0' + * ... '9', -1 otherwise + * the next line is similar to the previous one, but we need to decode both + * uppercase and lowercase letters, so we use (ch & 0xdf), which converts + * lowercase to uppercase */ -int hex_to_bin(char ch) +int hex_to_bin(unsigned char ch) { - if ((ch >= '0') && (ch <= '9')) - return ch - '0'; - ch = tolower(ch); - if ((ch >= 'a') && (ch <= 'f')) - return ch - 'a' + 10; - return -1; + unsigned char cu = ch & 0xdf; + return -1 + + ((ch - '0' + 1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) + + ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8); } EXPORT_SYMBOL(hex_to_bin); From e4d8a29997731b3bb14059024b24df9f784288d0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Apr 2022 11:26:40 -0400 Subject: [PATCH 370/803] hex2bin: fix access beyond string end If we pass too short string to "hex2bin" (and the string size without the terminating NUL character is even), "hex2bin" reads one byte after the terminating NUL character. This patch fixes it. Note that hex_to_bin returns -1 on error and hex2bin return -EINVAL on error - so we can't just return the variable "hi" or "lo" on error. This inconsistency may be fixed in the next merge window, but for the purpose of fixing this bug, we just preserve the existing behavior and return -1 and -EINVAL. Signed-off-by: Mikulas Patocka Reviewed-by: Andy Shevchenko Fixes: b78049831ffe ("lib: add error checking to hex2bin") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- lib/hexdump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/hexdump.c b/lib/hexdump.c index 369420ce553a..06833d404398 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -63,10 +63,13 @@ EXPORT_SYMBOL(hex_to_bin); int hex2bin(u8 *dst, const char *src, size_t count) { while (count--) { - int hi = hex_to_bin(*src++); - int lo = hex_to_bin(*src++); + int hi, lo; - if ((hi < 0) || (lo < 0)) + hi = hex_to_bin(*src++); + if (unlikely(hi < 0)) + return -EINVAL; + lo = hex_to_bin(*src++); + if (unlikely(lo < 0)) return -EINVAL; *dst++ = (hi << 4) | lo; From f9095ac1ba1ce407cecc1df93c05ad4ac504661c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 27 Apr 2022 08:58:02 +0200 Subject: [PATCH 371/803] dt-bindings: ufs: cdns,ufshc: Add power-domains The Cadence UFS controller can be part of power domain (as it is in example DTS of TI J721e UFS Host Controller Glue), so allow such property. Reported-by: Rob Herring Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220427065802.110402-1-krzysztof.kozlowski@linaro.org --- Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml index d227dea368be..fb45f66d6454 100644 --- a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml +++ b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml @@ -43,6 +43,9 @@ properties: - const: phy_clk - const: ref_clk + power-domains: + maxItems: 1 + reg: maxItems: 1 From e17fd4bf54fb579d03566ab22a02dc03b36f4d06 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 26 Apr 2022 08:35:08 -0500 Subject: [PATCH 372/803] dt-bindings: leds-mt6360: Drop redundant 'unevaluatedProperties' The binding has both 'unevaluatedProperties: false' and 'additionalProperties: false' which is redundant. 'additionalProperties' is the stricter of the two, so drop 'unevaluatedProperties'. Fixes: e05cab34e417 ("dt-bindings: leds: Add bindings for MT6360 LED") Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220426133508.1849580-1-robh@kernel.org --- Documentation/devicetree/bindings/leds/leds-mt6360.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml index b2fe6eb89389..10f95bf1d666 100644 --- a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml +++ b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml @@ -43,8 +43,6 @@ patternProperties: - 4 # LED output FLASH1 - 5 # LED output FLASH2 -unevaluatedProperties: false - required: - compatible - "#address-cells" From 39c184a6a9a7a99950b321d55fe713175cf1d404 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 27 Apr 2022 09:08:52 +0300 Subject: [PATCH 373/803] intel_idle: Fix the 'preferred_cstates' module parameter Problem description. When user boots kernel up with the 'intel_idle.preferred_cstates=4' option, we enable C1E and disable C1 states on Sapphire Rapids Xeon (SPR). In order for C1E to work on SPR, we have to enable the C1E promotion bit on all CPUs. However, we enable it only on one CPU. Fix description. The 'intel_idle' driver already has the infrastructure for disabling C1E promotion on every CPU. This patch uses the same infrastructure for enabling C1E promotion on every CPU. It changes the boolean 'disable_promotion_to_c1e' variable to a tri-state 'c1e_promotion' variable. Tested on a 2-socket SPR system. I verified the following combinations: * C1E promotion enabled and disabled in BIOS. * Booted with and without the 'intel_idle.preferred_cstates=4' kernel argument. In all 4 cases C1E promotion was correctly set on all CPUs. Also tested on an old Broadwell system, just to make sure it does not cause a regression. C1E promotion was correctly disabled on that system, both C1 and C1E were exposed (as expected). Fixes: da0e58c038e6 ("intel_idle: add 'preferred_cstates' module argument") Reported-by: Jan Beulich Signed-off-by: Artem Bityutskiy [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b7640cfe0020..cf5ed4c1d02c 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -69,7 +69,12 @@ static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -1398,8 +1403,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -static void c1e_promotion_enable(void); - /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1587,8 +1590,7 @@ static void __init spr_idle_state_table_update(void) spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; /* Enable C1E using the "C1E promotion" bit. */ - c1e_promotion_enable(); - disable_promotion_to_c1e = false; + c1e_promotion = C1E_PROMOTION_ENABLE; } /* @@ -1754,7 +1756,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1833,7 +1837,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { From 7eac3bd38d18cd3317756649921b8264ddfee692 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 27 Apr 2022 09:08:53 +0300 Subject: [PATCH 374/803] intel_idle: Fix SPR C6 optimization The Sapphire Rapids (SPR) C6 optimization was added to the end of the 'spr_idle_state_table_update()' function. However, the function has a 'return' which may happen before the optimization has a chance to run. And this may prevent the optimization from happening. This is an unlikely scenario, but possible if user boots with, say, the 'intel_idle.preferred_cstates=6' kernel boot option. This patch fixes the issue by eliminating the problematic 'return' statement. Fixes: 3a9cf77b60dc ("intel_idle: add core C6 optimization for SPR") Suggested-by: Jan Beulich Reported-by: Jan Beulich Signed-off-by: Artem Bityutskiy [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index cf5ed4c1d02c..47551ab73ca8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1581,11 +1581,9 @@ static void __init spr_idle_state_table_update(void) unsigned long long msr; /* Check if user prefers C1E over C1. */ - if (preferred_states_mask & BIT(2)) { - if (preferred_states_mask & BIT(1)) - /* Both can't be enabled, stick to the defaults. */ - return; - + if ((preferred_states_mask & BIT(2)) && + !(preferred_states_mask & BIT(1))) { + /* Disable C1 and enable C1E. */ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; From 4cddeacad6d4b23493a108d0705e7d2ab89ba5a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Apr 2022 09:49:12 -1000 Subject: [PATCH 375/803] Revert "block: inherit request start time from bio for BLK_CGROUP" This reverts commit 0006707723233cb2a9a23ca19fc3d0864835704c. It has a couple problems: * bio_issue_time() is stored in bio->bi_issue truncated to 51 bits. This overflows in slightly over 26 days. Setting rq->io_start_time_ns with it means that io duration calculation would yield >26days after 26 days of uptime. This, for example, confuses kyber making it cause high IO latencies. * rq->io_start_time_ns should record the time that the IO is issued to the device so that on-device latency can be measured. However, bio_issue_time() is set before the bio goes through the rq-qos controllers (wbt, iolatency, iocost), so when the bio gets throttled in any of the mechanisms, the measured latencies make no sense - on-device latencies end up higher than request-alloc-to-completion latencies. We'll need a smarter way to avoid calling ktime_get_ns() repeatedly back-to-back. For now, let's revert the commit. Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v5.16+ Link: https://lore.kernel.org/r/YmmeOLfo5lzc+8yI@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c4370d276170..84d749511f55 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1131,14 +1131,7 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - u64 start_time; -#ifdef CONFIG_BLK_CGROUP - if (rq->bio) - start_time = bio_issue_time(&rq->bio->bi_issue); - else -#endif - start_time = ktime_get_ns(); - rq->io_start_time_ns = start_time; + rq->io_start_time_ns = ktime_get_ns(); rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; rq_qos_issue(q, rq); From e6f9d69648029e48b8f97db09368d419b5e2614a Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 15 Apr 2022 16:04:05 +0800 Subject: [PATCH 376/803] btrfs: export a helper for compression hard check inode_can_compress will be used outside of inode.c to check the availability of setting compression flag by xattr. This patch moves this function as an internal helper and renames it to btrfs_inode_can_compress. Reviewed-by: Nikolay Borisov Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 11 +++++++++++ fs/btrfs/inode.c | 15 ++------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 47e72d72f7d0..32131a5d321b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -384,6 +384,17 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) return ret; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode) +{ + if (inode->flags & BTRFS_INODE_NODATACOW || + inode->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + struct btrfs_dio_private { struct inode *inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bac68d8e96f..673b9259f6c0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -480,17 +480,6 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } -/* - * Check if the inode has flags compatible with compression - */ -static inline bool inode_can_compress(struct btrfs_inode *inode) -{ - if (inode->flags & BTRFS_INODE_NODATACOW || - inode->flags & BTRFS_INODE_NODATASUM) - return false; - return true; -} - /* * Check if the inode needs to be submitted to compression, based on mount * options, defragmentation, properties or heuristics. @@ -500,7 +489,7 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, { struct btrfs_fs_info *fs_info = inode->root->fs_info; - if (!inode_can_compress(inode)) { + if (!btrfs_inode_can_compress(inode)) { WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), KERN_ERR "BTRFS: unexpected compression for ino %llu\n", btrfs_ino(inode)); @@ -2019,7 +2008,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root)); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); - } else if (!inode_can_compress(inode) || + } else if (!btrfs_inode_can_compress(inode) || !inode_need_compress(inode, start, end)) { if (zoned) ret = run_delalloc_zoned(inode, locked_page, start, end, From 0e852ab8974cd2b5946766b2d9baf82c78ace03d Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 15 Apr 2022 16:04:06 +0800 Subject: [PATCH 377/803] btrfs: do not allow compression on nodatacow files Compression and nodatacow are mutually exclusive. A similar issue was fixed by commit f37c563bab429 ("btrfs: add missing check for nocow and compression inode flags"). Besides ioctl, there is another way to enable/disable/reset compression directly via xattr. The following steps will result in a invalid combination. $ touch bar $ chattr +C bar $ lsattr bar ---------------C-- bar $ setfattr -n btrfs.compression -v zstd bar $ lsattr bar --------c------C-- bar To align with the logic in check_fsflags, nocompress will also be unacceptable after this patch, to prevent mix any compression-related options with nodatacow. $ touch bar $ chattr +C bar $ lsattr bar ---------------C-- bar $ setfattr -n btrfs.compression -v zstd bar setfattr: bar: Invalid argument $ setfattr -n btrfs.compression -v no bar setfattr: bar: Invalid argument When both compression and nodatacow are enabled, then btrfs_run_delalloc_range prefers nodatacow and no compression happens. Reported-by: Jayce Lin CC: stable@vger.kernel.org # 5.10.x: e6f9d6964802: btrfs: export a helper for compression hard check CC: stable@vger.kernel.org # 5.10.x Reviewed-by: Filipe Manana Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 16 +++++++++++----- fs/btrfs/props.h | 3 ++- fs/btrfs/xattr.c | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 1a6d2d5b4b33..5a6f87744c28 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -17,7 +17,8 @@ static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); struct prop_handler { struct hlist_node node; const char *xattr_name; - int (*validate)(const char *value, size_t len); + int (*validate)(const struct btrfs_inode *inode, const char *value, + size_t len); int (*apply)(struct inode *inode, const char *value, size_t len); const char *(*extract)(struct inode *inode); int inheritable; @@ -55,7 +56,8 @@ find_prop_handler(const char *name, return NULL; } -int btrfs_validate_prop(const char *name, const char *value, size_t value_len) +int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, + const char *value, size_t value_len) { const struct prop_handler *handler; @@ -69,7 +71,7 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len) if (value_len == 0) return 0; - return handler->validate(value, value_len); + return handler->validate(inode, value, value_len); } int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, @@ -252,8 +254,12 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) return ret; } -static int prop_compression_validate(const char *value, size_t len) +static int prop_compression_validate(const struct btrfs_inode *inode, + const char *value, size_t len) { + if (!btrfs_inode_can_compress(inode)) + return -EINVAL; + if (!value) return 0; @@ -364,7 +370,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, * This is not strictly necessary as the property should be * valid, but in case it isn't, don't propagate it further. */ - ret = h->validate(value, strlen(value)); + ret = h->validate(BTRFS_I(inode), value, strlen(value)); if (ret) continue; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 40b2c65b518c..2b2ac15ab788 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -13,7 +13,8 @@ void __init btrfs_props_init(void); int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags); -int btrfs_validate_prop(const char *name, const char *value, size_t value_len); +int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, + const char *value, size_t value_len); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 99abf41b89b9..9632d0ff2038 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -403,7 +403,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, struct btrfs_root *root = BTRFS_I(inode)->root; name = xattr_full_name(handler, name); - ret = btrfs_validate_prop(name, value, size); + ret = btrfs_validate_prop(BTRFS_I(inode), name, value, size); if (ret) return ret; From d0e64a981fd841cb0f28fcd6afcac55e6f1e6994 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 10:56:39 +0100 Subject: [PATCH 378/803] btrfs: always log symlinks in full mode On Linux, empty symlinks are invalid, and attempting to create one with the system call symlink(2) results in an -ENOENT error and this is explicitly documented in the man page. If we rename a symlink that was created in the current transaction and its parent directory was logged before, we actually end up logging the symlink without logging its content, which is stored in an inline extent. That means that after a power failure we can end up with an empty symlink, having no content and an i_size of 0 bytes. It can be easily reproduced like this: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/testdir $ sync # Create a file inside the directory and fsync the directory. $ touch /mnt/testdir/foo $ xfs_io -c "fsync" /mnt/testdir # Create a symlink inside the directory and then rename the symlink. $ ln -s /mnt/testdir/foo /mnt/testdir/bar $ mv /mnt/testdir/bar /mnt/testdir/baz # Now fsync again the directory, this persist the log tree. $ xfs_io -c "fsync" /mnt/testdir $ mount /dev/sdc /mnt $ stat -c %s /mnt/testdir/baz 0 $ readlink /mnt/testdir/baz $ Fix this by always logging symlinks in full mode (LOG_INODE_ALL), so that their content is also logged. A test case for fstests will follow. CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 09e4f1a04e6f..11399c8eed87 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5804,6 +5804,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&inode->log_mutex); } + /* + * For symlinks, we must always log their content, which is stored in an + * inline extent, otherwise we could end up with an empty symlink after + * log replay, which is invalid on linux (symlink(2) returns -ENOENT if + * one attempts to create an empty symlink). + * We don't need to worry about flushing delalloc, because when we create + * the inline extent when the symlink is created (we never have delalloc + * for symlinks). + */ + if (S_ISLNK(inode->vfs_inode.i_mode)) + inode_only = LOG_INODE_ALL; + /* * Before logging the inode item, cache the value returned by * inode_logged(), because after that we have the need to figure out if @@ -6182,7 +6194,7 @@ again: } ctx->log_new_dentries = false; - if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) + if (type == BTRFS_FT_DIR) log_mode = LOG_INODE_ALL; ret = btrfs_log_inode(trans, BTRFS_I(di_inode), log_mode, ctx); From 193b4e83986d7ee6caa8ceefb5ee9f58240fbee0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 11:03:09 +0100 Subject: [PATCH 379/803] btrfs: do not BUG_ON() on failure to update inode when setting xattr We are doing a BUG_ON() if we fail to update an inode after setting (or clearing) a xattr, but there's really no reason to not instead simply abort the transaction and return the error to the caller. This should be a rare error because we have previously reserved enough metadata space to update the inode and the delayed inode should have already been setup, so an -ENOSPC or -ENOMEM, which are the possible errors, are very unlikely to happen. So replace the BUG_ON()s with a transaction abort. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 9632d0ff2038..367bb87b66df 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -262,7 +262,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, ret); out: if (start_trans) btrfs_end_transaction(trans); @@ -416,7 +417,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - BUG_ON(ret); + if (ret) + btrfs_abort_transaction(trans, ret); } btrfs_end_transaction(trans); From 4b73c55fdebd8939f0f6000921075f7f6fa41397 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Apr 2022 11:01:22 +0100 Subject: [PATCH 380/803] btrfs: skip compression property for anything other than files and dirs The compression property only has effect on regular files and directories (so that it's propagated to files and subdirectories created inside a directory). For any other inode type (symlink, fifo, device, socket), it's pointless to set the compression property because it does nothing and ends up unnecessarily wasting leaf space due to the pointless xattr (75 or 76 bytes, depending on the compression value). Symlinks in particular are very common (for example, I have almost 10k symlinks under /etc, /usr and /var alone) and therefore it's worth to avoid wasting leaf space with the compression xattr. For example, the compression property can end up on a symlink or character device implicitly, through inheritance from a parent directory $ mkdir /mnt/testdir $ btrfs property set /mnt/testdir compression lzo $ ln -s yadayada /mnt/testdir/lnk $ mknod /mnt/testdir/dev c 0 0 Or explicitly like this: $ ln -s yadayda /mnt/lnk $ setfattr -h -n btrfs.compression -v lzo /mnt/lnk So skip the compression property on inodes that are neither a regular file nor a directory. CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/props.h | 1 + fs/btrfs/xattr.c | 3 +++ 3 files changed, 47 insertions(+) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 5a6f87744c28..1b31481f9e72 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -21,6 +21,7 @@ struct prop_handler { size_t len); int (*apply)(struct inode *inode, const char *value, size_t len); const char *(*extract)(struct inode *inode); + bool (*ignore)(const struct btrfs_inode *inode); int inheritable; }; @@ -74,6 +75,28 @@ int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, return handler->validate(inode, value, value_len); } +/* + * Check if a property should be ignored (not set) for an inode. + * + * @inode: The target inode. + * @name: The property's name. + * + * The caller must be sure the given property name is valid, for example by + * having previously called btrfs_validate_prop(). + * + * Returns: true if the property should be ignored for the given inode + * false if the property must not be ignored for the given inode + */ +bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name) +{ + const struct prop_handler *handler; + + handler = find_prop_handler(name, NULL); + ASSERT(handler != NULL); + + return handler->ignore(inode); +} + int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags) @@ -316,6 +339,22 @@ static int prop_compression_apply(struct inode *inode, const char *value, return 0; } +static bool prop_compression_ignore(const struct btrfs_inode *inode) +{ + /* + * Compression only has effect for regular files, and for directories + * we set it just to propagate it to new files created inside them. + * Everything else (symlinks, devices, sockets, fifos) is pointless as + * it will do nothing, so don't waste metadata space on a compression + * xattr for anything that is neither a file nor a directory. + */ + if (!S_ISREG(inode->vfs_inode.i_mode) && + !S_ISDIR(inode->vfs_inode.i_mode)) + return true; + + return false; +} + static const char *prop_compression_extract(struct inode *inode) { switch (BTRFS_I(inode)->prop_compress) { @@ -336,6 +375,7 @@ static struct prop_handler prop_handlers[] = { .validate = prop_compression_validate, .apply = prop_compression_apply, .extract = prop_compression_extract, + .ignore = prop_compression_ignore, .inheritable = 1 }, }; @@ -362,6 +402,9 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (!h->inheritable) continue; + if (h->ignore(BTRFS_I(inode))) + continue; + value = h->extract(parent); if (!value) continue; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 2b2ac15ab788..59bea741cfcf 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -15,6 +15,7 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, int flags); int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, const char *value, size_t value_len); +bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 367bb87b66df..85691dc2232f 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -408,6 +408,9 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (ret) return ret; + if (btrfs_ignore_prop(BTRFS_I(inode), name)) + return 0; + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) return PTR_ERR(trans); From 31fa985b4196f8a66f027672e9bf2b81fea0417c Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 27 Apr 2022 12:41:56 -0700 Subject: [PATCH 381/803] kasan: prevent cpu_quarantine corruption when CPU offline and cache shrink occur at same time kasan_quarantine_remove_cache() is called in kmem_cache_shrink()/ destroy(). The kasan_quarantine_remove_cache() call is protected by cpuslock in kmem_cache_destroy() to ensure serialization with kasan_cpu_offline(). However the kasan_quarantine_remove_cache() call is not protected by cpuslock in kmem_cache_shrink(). When a CPU is going offline and cache shrink occurs at same time, the cpu_quarantine may be corrupted by interrupt (per_cpu_remove_cache operation). So add a cpu_quarantine offline flags check in per_cpu_remove_cache(). [akpm@linux-foundation.org: add comment, per Zqiang] Link: https://lkml.kernel.org/r/20220414025925.2423818-1-qiang1.zhang@intel.com Signed-off-by: Zqiang Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/quarantine.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 08291ed33e93..0a9def8ce5e8 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -315,6 +315,13 @@ static void per_cpu_remove_cache(void *arg) struct qlist_head *q; q = this_cpu_ptr(&cpu_quarantine); + /* + * Ensure the ordering between the writing to q->offline and + * per_cpu_remove_cache. Prevent cpu_quarantine from being corrupted + * by interrupt. + */ + if (READ_ONCE(q->offline)) + return; qlist_move_cache(q, &to_free, cache); qlist_free_all(&to_free, cache); } From 5603f9bdea68406f54132125b6fdddeeb5c0d2e4 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 27 Apr 2022 12:41:59 -0700 Subject: [PATCH 382/803] docs: vm/page_owner: use literal blocks for param description Sphinx generates hard-to-read lists of parameters at the bottom of the page. Fix them by putting literal-block markers of "::" in front of them. Link: https://lkml.kernel.org/r/cfd3bcc0-b51d-0c68-c065-ca1c4c202447@gmail.com Signed-off-by: Akira Yokosawa Fixes: 57f2b54a9379 ("Documentation/vm/page_owner.rst: update the documentation") Cc: Shenghong Han Cc: Haowen Bai Cc: Jonathan Corbet Cc: Alex Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/page_owner.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 65204d7f004f..7e0c3f574e78 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -110,7 +110,7 @@ Usage If you want to sort by the page nums of buf, use the ``-m`` parameter. The detailed parameters are: - fundamental function: + fundamental function:: Sort: -a Sort by memory allocation time. @@ -122,7 +122,7 @@ Usage -s Sort by stack trace. -t Sort by times (default). - additional function: + additional function:: Cull: --cull @@ -153,6 +153,7 @@ Usage STANDARD FORMAT SPECIFIERS ========================== +:: KEY LONG DESCRIPTION p pid process ID From 2a50fc5fd09798cc154b587acd4f4ee261ea19be Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Apr 2022 18:13:32 +0100 Subject: [PATCH 383/803] KVM: arm64: Handle host stage-2 faults from 32-bit EL0 When pKVM is enabled, host memory accesses are translated by an identity mapping at stage-2, which is populated lazily in response to synchronous exceptions from 64-bit EL1 and EL0. Extend this handling to cover exceptions originating from 32-bit EL0 as well. Although these are very unlikely to occur in practice, as the kernel typically ensures that user pages are initialised before mapping them in, drivers could still map previously untouched device pages into userspace and expect things to work rather than panic the system. Cc: Quentin Perret Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220427171332.13635-1-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/host.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 3d613e721a75..727c979b2b69 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -198,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el2_vect // FIQ EL2h invalid_host_el2_vect // Error EL2h - host_el1_sync_vect // Synchronous 64-bit EL1 - invalid_host_el1_vect // IRQ 64-bit EL1 - invalid_host_el1_vect // FIQ 64-bit EL1 - invalid_host_el1_vect // Error 64-bit EL1 + host_el1_sync_vect // Synchronous 64-bit EL1/EL0 + invalid_host_el1_vect // IRQ 64-bit EL1/EL0 + invalid_host_el1_vect // FIQ 64-bit EL1/EL0 + invalid_host_el1_vect // Error 64-bit EL1/EL0 - invalid_host_el1_vect // Synchronous 32-bit EL1 - invalid_host_el1_vect // IRQ 32-bit EL1 - invalid_host_el1_vect // FIQ 32-bit EL1 - invalid_host_el1_vect // Error 32-bit EL1 + host_el1_sync_vect // Synchronous 32-bit EL1/EL0 + invalid_host_el1_vect // IRQ 32-bit EL1/EL0 + invalid_host_el1_vect // FIQ 32-bit EL1/EL0 + invalid_host_el1_vect // Error 32-bit EL1/EL0 SYM_CODE_END(__kvm_hyp_host_vector) /* From 8f6379e207e7d834065a080f407a60d67349d961 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 15:55:30 +0100 Subject: [PATCH 384/803] KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set kvm->arch.arm_pmu is set when userspace attempts to set the first PMU attribute. As certain attributes are mandatory, arm_pmu ends up always being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU. However, this only happens if the VCPU has the PMU feature. If the VCPU doesn't have the feature bit set, kvm->arch.arm_pmu will be left uninitialized and equal to NULL. KVM doesn't do ID register emulation for 32-bit guests and accesses to the PMU registers aren't gated by the pmu_visibility() function. This is done to prevent injecting unexpected undefined exceptions in guests which have detected the presence of a hardware PMU. But even though the VCPU feature is missing, KVM still attempts to emulate certain aspects of the PMU when PMU registers are accessed. This leads to a NULL pointer dereference like this one, which happens on an odroid-c4 board when running the kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU feature being set: [ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150 [ 454.405865] Mem abort info: [ 454.408596] ESR = 0x96000004 [ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits [ 454.416901] SET = 0, FnV = 0 [ 454.419909] EA = 0, S1PTW = 0 [ 454.423010] FSC = 0x04: level 0 translation fault [ 454.427841] Data abort info: [ 454.430687] ISV = 0, ISS = 0x00000004 [ 454.434484] CM = 0, WnR = 0 [ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000 [ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000 [ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 454.456036] Modules linked in: [ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113 [ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT) [ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74 [ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80 [ 454.487775] sp : ffff80000a9839c0 [ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000 [ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000 [ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000 [ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000 [ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000 [ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b [ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00 [ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000 [ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000 [ 454.561779] Call trace: [ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74 [ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80 [ 454.573766] access_pmu_evtyper+0x128/0x170 [ 454.577905] perform_access+0x34/0x80 [ 454.581527] kvm_handle_cp_32+0x13c/0x160 [ 454.585495] kvm_handle_cp15_32+0x1c/0x30 [ 454.589462] handle_exit+0x70/0x180 [ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0 [ 454.597485] kvm_vcpu_ioctl+0x23c/0x940 [ 454.601280] __arm64_sys_ioctl+0xa8/0xf0 [ 454.605160] invoke_syscall+0x48/0x114 [ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc [ 454.613527] do_el0_svc+0x28/0x90 [ 454.616803] el0_svc+0x34/0xb0 [ 454.619822] el0t_64_sync_handler+0xa4/0x130 [ 454.624049] el0t_64_sync+0x18c/0x190 [ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001) [ 454.633714] ---[ end trace 0000000000000000 ]--- In this particular case, Linux hasn't detected the presence of a hardware PMU because the PMU node is missing from the DTB, so userspace would have been unable to set the VCPU PMU feature even if it attempted it. What happens is that the 32-bit guest reads ID_DFR0, which advertises the presence of the PMU, and when it tries to program a counter, it triggers the NULL pointer dereference because kvm->arch.arm_pmu is NULL. kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64: Keep a per-VM pointer to the default PMU"). Until that commit, this error would be triggered instead: [ 73.388140] ------------[ cut here ]------------ [ 73.388189] Unknown PMU version 0 [ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.399821] Modules linked in: [ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114 [ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT) [ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.430779] sp : ffff80000a8db9b0 [ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000 [ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720 [ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000 [ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff [ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720 [ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6 [ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488 [ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488 [ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001 [ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0 [ 73.504784] Call trace: [ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74 [ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80 [ 73.516770] access_pmu_evtyper+0x128/0x16c [ 73.520910] perform_access+0x34/0x80 [ 73.524532] kvm_handle_cp_32+0x13c/0x160 [ 73.528500] kvm_handle_cp15_32+0x1c/0x30 [ 73.532467] handle_exit+0x70/0x180 [ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0 [ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0 [ 73.544283] __arm64_sys_ioctl+0xa8/0xf0 [ 73.548165] invoke_syscall+0x48/0x114 [ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc [ 73.556531] do_el0_svc+0x28/0x90 [ 73.559808] el0_svc+0x28/0x80 [ 73.562826] el0t_64_sync_handler+0xa4/0x130 [ 73.567054] el0t_64_sync+0x1a0/0x1a4 [ 73.570676] ---[ end trace 0000000000000000 ]--- [ 73.575382] kvm: pmu event creation failed -2 The root cause remains the same: kvm->arch.pmuver was never set to something sensible because the VCPU feature itself was never set. The odroid-c4 is somewhat of a special case, because Linux doesn't probe the PMU. But the above errors can easily be reproduced on any hardware, with or without a PMU driver, as long as userspace doesn't set the PMU feature. Work around the fact that KVM advertises a PMU even when the VCPU feature is not set by gating all PMU emulation on the feature. The guest can still access the registers without KVM injecting an undefined exception. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 78fdc443adc7..3dc990ac4f44 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); if (kvm_pmu_pmc_is_chained(pmc) && @@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { u64 reg; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); @@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; @@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!val) + if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; @@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { int i; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); @@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, { u64 reg, mask; + if (!kvm_vcpu_has_pmu(vcpu)) + return; + mask = ARMV8_PMU_EVTYPE_MASK; mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); @@ -827,6 +845,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) u64 val, mask = 0; int base, i, nr_events; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + if (!pmceid1) { val = read_sysreg(pmceid0_el0); base = 0; From 7c6b6e18c890f30965b0589b0a57645e1dbccfde Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Mon, 18 Apr 2022 11:55:58 -0400 Subject: [PATCH 385/803] drm/amdkfd: Fix GWS queue count dqm->gws_queue_count and pdd->qpd.mapped_gws_queue need to be updated each time the queue gets evicted. Fixes: b8020b0304c8 ("drm/amdkfd: Enable over-subscription with >1 GWS queue") Signed-off-by: David Yat Sin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 83 +++++++++---------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index acf4f7975850..198672264492 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -130,19 +130,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, } static void increment_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct qcm_process_device *qpd, + struct queue *q) { dqm->active_queue_count++; - if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } } static void decrement_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct qcm_process_device *qpd, + struct queue *q) { dqm->active_queue_count--; - if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } } /* @@ -412,7 +426,7 @@ add_queue_to_list: list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, qpd, q); /* * Unconditionally increment this counter, regardless of the queue's @@ -601,13 +615,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } qpd->queue_count--; - if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } - } + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); return retval; } @@ -700,12 +709,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, * dqm->active_queue_count to determine whether a new runlist must be * uploaded. */ - if (q->properties.is_active && !prev_active) - increment_queue_count(dqm, q->properties.type); - else if (!q->properties.is_active && prev_active) - decrement_queue_count(dqm, q->properties.type); - - if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active && !prev_active) { + increment_queue_count(dqm, &pdd->qpd, q); + } else if (!q->properties.is_active && prev_active) { + decrement_queue_count(dqm, &pdd->qpd, q); + } else if (q->gws && !q->properties.is_gws) { if (q->properties.is_active) { dqm->gws_queue_count++; pdd->qpd.mapped_gws_queue = true; @@ -767,11 +775,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } + decrement_queue_count(dqm, qpd, q); if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -817,7 +821,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - decrement_queue_count(dqm, q->properties.type); + decrement_queue_count(dqm, qpd, q); } pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, @@ -888,11 +892,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - increment_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count++; - qpd->mapped_gws_queue = true; - } + increment_queue_count(dqm, qpd, q); if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -950,7 +950,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, &pdd->qpd, q); } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -1378,7 +1378,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->total_queue_count); list_add(&kq->list, &qpd->priv_queue_list); - increment_queue_count(dqm, kq->queue->properties.type); + increment_queue_count(dqm, qpd, kq->queue); qpd->is_debug = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1392,7 +1392,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, { dqm_lock(dqm); list_del(&kq->list); - decrement_queue_count(dqm, kq->queue->properties.type); + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); /* @@ -1467,7 +1467,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, qpd->queue_count++; if (q->properties.is_active) { - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, qpd, q); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -1683,15 +1683,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); + decrement_queue_count(dqm, qpd, q); retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } } /* @@ -1932,7 +1928,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* Clean all kernel queues */ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { list_del(&kq->list); - decrement_queue_count(dqm, kq->queue->properties.type); + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; dqm->total_queue_count--; filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; @@ -1945,13 +1941,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } - } + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); dqm->total_queue_count--; } From f567656f8ab82e43815d8d071d9864941b613a82 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 13 Apr 2022 11:37:53 -0400 Subject: [PATCH 386/803] drm/amdkfd: CRIU add support for GWS queues Add support to checkpoint/restore GWS (Global Wave Sync) queues. Signed-off-by: David Yat Sin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9967a73d5b0f..8f58fc491b28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1103,7 +1103,7 @@ struct kfd_criu_queue_priv_data { uint32_t priority; uint32_t q_percent; uint32_t doorbell_id; - uint32_t is_gws; + uint32_t gws; uint32_t sdma_id; uint32_t eop_ring_buffer_size; uint32_t ctx_save_restore_area_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 6eca9509f2e3..4f58e671d39b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -636,6 +636,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd, q_data->ctx_save_restore_area_size = q->properties.ctx_save_restore_area_size; + q_data->gws = !!q->gws; + ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); if (ret) { pr_err("Failed checkpoint queue_mqd (%d)\n", ret); @@ -743,7 +745,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, struct kfd_criu_queue_priv_data *q_data) { qp->is_interop = false; - qp->is_gws = q_data->is_gws; qp->queue_percent = q_data->q_percent; qp->priority = q_data->priority; qp->queue_address = q_data->q_address; @@ -826,12 +827,15 @@ int kfd_criu_restore_queue(struct kfd_process *p, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); - ret = -EINVAL; + goto exit; } + if (q_data->gws) + ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws); + exit: if (ret) - pr_err("Failed to create queue (%d)\n", ret); + pr_err("Failed to restore queue (%d)\n", ret); else pr_debug("Queue id %d was restored successfully\n", queue_id); From f95af4a9236695caed24fe6401256bb974e8f2a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Dec 2021 17:26:24 -0500 Subject: [PATCH 387/803] drm/amdgpu: don't runtime suspend if there are displays attached (v3) We normally runtime suspend when there are displays attached if they are in the DPMS off state, however, if something wakes the GPU we send a hotplug event on resume (in case any displays were connected while the GPU was in suspend) which can cause userspace to light up the displays again soon after they were turned off. Prior to commit 087451f372bf76 ("drm/amdgpu: use generic fb helpers instead of setting up AMD own's."), the driver took a runtime pm reference when the fbdev emulation was enabled because we didn't implement proper shadowing support for vram access when the device was off so the device never runtime suspended when there was a console bound. Once that commit landed, we now utilize the core fb helper implementation which properly handles the emulation, so runtime pm now suspends in cases where it did not before. Ultimately, we need to sort out why runtime suspend in not working in this case for some users, but this should restore similar behavior to before. v2: move check into runtime_suspend v3: wake ups -> wakeups in comment, retain pm_runtime behavior in runtime_idle callback Fixes: 087451f372bf76 ("drm/amdgpu: use generic fb helpers instead of setting up AMD own's.") Link: https://lore.kernel.org/r/20220403132322.51c90903@darkstar.example.org/ Tested-by: Michele Ballabio Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 105 ++++++++++++++++-------- 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 29e9419a914b..7fd0277b2805 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2395,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev) return amdgpu_device_resume(drm_dev, true); } +static int amdgpu_runtime_idle_check_display(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + if (adev->mode_info.num_crtc) { + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + int ret = 0; + + /* XXX: Return busy if any displays are connected to avoid + * possible display wakeups after runtime resume due to + * hotplug events in case any displays were connected while + * the GPU was in suspend. Remove this once that is fixed. + */ + mutex_lock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->status == connector_status_connected) { + ret = -EBUSY; + break; + } + } + drm_connector_list_iter_end(&iter); + mutex_unlock(&drm_dev->mode_config.mutex); + + if (ret) + return ret; + + if (amdgpu_device_has_dc_support(adev)) { + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, drm_dev) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) + ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) + break; + } + } else { + mutex_lock(&drm_dev->mode_config.mutex); + drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); + + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->dpms == DRM_MODE_DPMS_ON) { + ret = -EBUSY; + break; + } + } + + drm_connector_list_iter_end(&iter); + + drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); + mutex_unlock(&drm_dev->mode_config.mutex); + } + if (ret) + return ret; + } + + return 0; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2407,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) return -EBUSY; } + ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -2516,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return -EBUSY; } - if (amdgpu_device_has_dc_support(adev)) { - struct drm_crtc *crtc; - - drm_for_each_crtc(crtc, drm_dev) { - drm_modeset_lock(&crtc->mutex, NULL); - if (crtc->state->active) - ret = -EBUSY; - drm_modeset_unlock(&crtc->mutex); - if (ret < 0) - break; - } - - } else { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - - mutex_lock(&drm_dev->mode_config.mutex); - drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); - - drm_connector_list_iter_begin(drm_dev, &iter); - drm_for_each_connector_iter(list_connector, &iter) { - if (list_connector->dpms == DRM_MODE_DPMS_ON) { - ret = -EBUSY; - break; - } - } - - drm_connector_list_iter_end(&iter); - - drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); - mutex_unlock(&drm_dev->mode_config.mutex); - } - - if (ret == -EBUSY) - DRM_DEBUG_DRIVER("failing to power off - crtc active\n"); + ret = amdgpu_runtime_idle_check_display(dev); pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); From 65e54987508b6f0771f56bdfa3ee1926d52785ae Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 21 Apr 2022 17:03:09 +0800 Subject: [PATCH 388/803] drm/amd/display: Fix memory leak in dcn21_clock_source_create When dcn20_clk_src_construct() fails, we need to release clk_src. Fixes: 6f4e6361c3ff ("drm/amd/display: Add Renoir resource (v2)") Signed-off-by: Miaoqian Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 3fe4bfbb98a0..faab59508d82 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -997,6 +997,7 @@ static struct clock_source *dcn21_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } From a71849cdeaec4579696e5e1c45d9279f7b7484bd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 8 Apr 2022 19:51:34 +0800 Subject: [PATCH 389/803] drm/amd/pm: fix the deadlock issue observed on SI The adev->pm.mutx is already held at the beginning of amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_enable_vce. But on their calling path, amdgpu_display_bandwidth_update will be called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They will then try to acquire the same adev->pm.mutex and deadlock will occur. By placing amdgpu_display_bandwidth_update outside of adev->pm.mutex protection(considering logically they do not need such protection) and restructuring the call flow accordingly, we can eliminate the deadlock issue. This comes with no real logics change. Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") Reported-by: Paul Menzel Reported-by: Arthur Marsh Link: https://lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de/ BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1957 Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 +++++++++++++++++++ .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- 4 files changed, 39 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5504d81c77b7..72e7b5d40af6 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int i; if (!adev->pm.dpm_enabled) return; @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) if (!pp_funcs->pm_compute_clocks) return; + if (adev->mode_info.num_crtc) + amdgpu_display_bandwidth_update(adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) + amdgpu_fence_wait_empty(ring); + } + mutex_lock(&adev->pm.mutex); pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); mutex_unlock(&adev->pm.mutex); @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); if (ret) DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); if (ret) DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 9613c6181c17..d3fe149d8476 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_legacy_dpm_compute_clocks(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } amdgpu_dpm_get_active_displays(adev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index caae54487f9c..633dab14f51c 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev) } #endif -static int si_set_powergating_by_smu(void *handle, - uint32_t block_type, - bool gate) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (block_type) { - case AMD_IP_BLOCK_TYPE_UVD: - if (!gate) { - adev->pm.dpm.uvd_active = true; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; - } else { - adev->pm.dpm.uvd_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - case AMD_IP_BLOCK_TYPE_VCE: - if (!gate) { - adev->pm.dpm.vce_active = true; - /* XXX select vce level based on ring/task */ - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; - } else { - adev->pm.dpm.vce_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - default: - break; - } - return 0; -} - static int si_set_sw_state(struct amdgpu_device *adev) { return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = { .print_power_state = &si_dpm_print_power_state, .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level, .force_performance_level = &si_dpm_force_performance_level, - .set_powergating_by_smu = &si_set_powergating_by_smu, .vblank_too_short = &si_dpm_vblank_too_short, .set_fan_control_mode = &si_dpm_set_fan_control_mode, .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a2da46bf3985..71e9c6ce6b1a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1487,16 +1487,6 @@ static void pp_pm_compute_clocks(void *handle) { struct pp_hwmgr *hwmgr = handle; struct amdgpu_device *adev = hwmgr->adev; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } if (!amdgpu_device_has_dc_support(adev)) { amdgpu_dpm_get_active_displays(adev); From fb8cc3318e47e1a0ced4025ef614317b541147e7 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 19 Apr 2022 17:22:34 +0800 Subject: [PATCH 390/803] drm/amdgpu: keep mmhub clock gating being enabled during s2idle suspend Without MMHUB clock gating being enabled then MMHUB will not disconnect from DF and will result in DF C-state entry can't be accessed during S2idle suspend, and eventually s0ix entry will be blocked. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5228421b0f72..7c956cf21bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1151,6 +1151,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle, int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled + * is a new problem observed at DF 3.0.3, however with the same suspend sequence not + * seen any issue on the DF 3.0.2 series platform. + */ + if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) { + dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); + return 0; + } + r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) return r; From 85ea6b1ec915c9dd90caf3674b203999d8c7e062 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Apr 2022 15:38:10 +0100 Subject: [PATCH 391/803] KVM: arm64: Inject exception on out-of-IPA-range translation fault When taking a translation fault for an IPA that is outside of the range defined by the hypervisor (between the HW PARange and the IPA range), we stupidly treat it as an IO and forward the access to userspace. Of course, userspace can't do much with it, and things end badly. Arguably, the guest is braindead, but we should at least catch the case and inject an exception. Check the faulting IPA against: - the sanitised PARange: inject an address size fault - the IPA size: inject an abort Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 1 + arch/arm64/kvm/inject_fault.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 19 +++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 7496deab025a..f71358271b71 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_size_fault(struct kvm_vcpu *vcpu); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index b47df73e98d7..ba20405d2dc2 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) inject_abt64(vcpu, true, addr); } +void kvm_inject_size_fault(struct kvm_vcpu *vcpu) +{ + unsigned long addr, esr; + + addr = kvm_vcpu_get_fault_ipa(vcpu); + addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (kvm_vcpu_trap_is_iabt(vcpu)) + kvm_inject_pabt(vcpu, addr); + else + kvm_inject_dabt(vcpu, addr); + + /* + * If AArch64 or LPAE, set FSC to 0 to indicate an Address + * Size Fault at level 0, as if exceeding PARange. + * + * Non-LPAE guests will only get the external abort, as there + * is no way to to describe the ASF. + */ + if (vcpu_el1_is_32bit(vcpu) && + !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) + return; + + esr = vcpu_read_sys_reg(vcpu, ESR_EL1); + esr &= ~GENMASK_ULL(5, 0); + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); +} + /** * kvm_inject_undefined - inject an undefined instruction into the guest * @vcpu: The vCPU in which to inject the exception diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 53ae2c0640bc..5400fc020164 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1337,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + if (fault_status == FSC_FAULT) { + /* Beyond sanitised PARange (which is the IPA limit) */ + if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { + kvm_inject_size_fault(vcpu); + return 1; + } + + /* Falls between the IPA range and the PARange? */ + if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + if (is_iabt) + kvm_inject_pabt(vcpu, fault_ipa); + else + kvm_inject_dabt(vcpu, fault_ipa); + return 1; + } + } + /* Synchronous External Abort? */ if (kvm_vcpu_abt_issea(vcpu)) { /* From a0df71948e9548de819a6f1da68f5f1742258a52 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 26 Apr 2022 18:49:49 +0300 Subject: [PATCH 392/803] tls: Skip tls_append_frag on zero copy size Calling tls_append_frag when max_open_record_len == record->len might add an empty fragment to the TLS record if the call happens to be on the page boundary. Normally tls_append_frag coalesces the zero-sized fragment to the previous one, but not if it's on page boundary. If a resync happens then, the mlx5 driver posts dump WQEs in tx_post_resync_dump, and the empty fragment may become a data segment with byte_count == 0, which will confuse the NIC and lead to a CQE error. This commit fixes the described issue by skipping tls_append_frag on zero size to avoid adding empty fragments. The fix is not in the driver, because an empty fragment is hardly the desired behavior. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220426154949.159055-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 12f7b56771d9..af875ad4a822 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -483,11 +483,13 @@ handle_error: copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); - if (rc) - goto handle_error; - tls_append_frag(record, pfrag, copy); + if (copy) { + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) + goto handle_error; + tls_append_frag(record, pfrag, copy); + } size -= copy; if (!size) { From af68656d66eda219b7f55ce8313a1da0312c79e1 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 26 Apr 2022 08:39:13 -0700 Subject: [PATCH 393/803] bnx2x: fix napi API usage sequence While handling PCI errors (AER flow) driver tries to disable NAPI [napi_disable()] after NAPI is deleted [__netif_napi_del()] which causes unexpected system hang/crash. System message log shows the following: ======================================= [ 3222.537510] EEH: Detected PCI bus error on PHB#384-PE#800000 [ 3222.537511] EEH: This PCI device has failed 2 times in the last hour and will be permanently disabled after 5 failures. [ 3222.537512] EEH: Notify device drivers to shutdown [ 3222.537513] EEH: Beginning: 'error_detected(IO frozen)' [ 3222.537514] EEH: PE#800000 (PCI 0384:80:00.0): Invoking bnx2x->error_detected(IO frozen) [ 3222.537516] bnx2x: [bnx2x_io_error_detected:14236(eth14)]IO error detected [ 3222.537650] EEH: PE#800000 (PCI 0384:80:00.0): bnx2x driver reports: 'need reset' [ 3222.537651] EEH: PE#800000 (PCI 0384:80:00.1): Invoking bnx2x->error_detected(IO frozen) [ 3222.537651] bnx2x: [bnx2x_io_error_detected:14236(eth13)]IO error detected [ 3222.537729] EEH: PE#800000 (PCI 0384:80:00.1): bnx2x driver reports: 'need reset' [ 3222.537729] EEH: Finished:'error_detected(IO frozen)' with aggregate recovery state:'need reset' [ 3222.537890] EEH: Collect temporary log [ 3222.583481] EEH: of node=0384:80:00.0 [ 3222.583519] EEH: PCI device/vendor: 168e14e4 [ 3222.583557] EEH: PCI cmd/status register: 00100140 [ 3222.583557] EEH: PCI-E capabilities and status follow: [ 3222.583744] EEH: PCI-E 00: 00020010 012c8da2 00095d5e 00455c82 [ 3222.583892] EEH: PCI-E 10: 10820000 00000000 00000000 00000000 [ 3222.583893] EEH: PCI-E 20: 00000000 [ 3222.583893] EEH: PCI-E AER capability register set follows: [ 3222.584079] EEH: PCI-E AER 00: 13c10001 00000000 00000000 00062030 [ 3222.584230] EEH: PCI-E AER 10: 00002000 000031c0 000001e0 00000000 [ 3222.584378] EEH: PCI-E AER 20: 00000000 00000000 00000000 00000000 [ 3222.584416] EEH: PCI-E AER 30: 00000000 00000000 [ 3222.584416] EEH: of node=0384:80:00.1 [ 3222.584454] EEH: PCI device/vendor: 168e14e4 [ 3222.584491] EEH: PCI cmd/status register: 00100140 [ 3222.584492] EEH: PCI-E capabilities and status follow: [ 3222.584677] EEH: PCI-E 00: 00020010 012c8da2 00095d5e 00455c82 [ 3222.584825] EEH: PCI-E 10: 10820000 00000000 00000000 00000000 [ 3222.584826] EEH: PCI-E 20: 00000000 [ 3222.584826] EEH: PCI-E AER capability register set follows: [ 3222.585011] EEH: PCI-E AER 00: 13c10001 00000000 00000000 00062030 [ 3222.585160] EEH: PCI-E AER 10: 00002000 000031c0 000001e0 00000000 [ 3222.585309] EEH: PCI-E AER 20: 00000000 00000000 00000000 00000000 [ 3222.585347] EEH: PCI-E AER 30: 00000000 00000000 [ 3222.586872] RTAS: event: 5, Type: Platform Error (224), Severity: 2 [ 3222.586873] EEH: Reset without hotplug activity [ 3224.762767] EEH: Beginning: 'slot_reset' [ 3224.762770] EEH: PE#800000 (PCI 0384:80:00.0): Invoking bnx2x->slot_reset() [ 3224.762771] bnx2x: [bnx2x_io_slot_reset:14271(eth14)]IO slot reset initializing... [ 3224.762887] bnx2x 0384:80:00.0: enabling device (0140 -> 0142) [ 3224.768157] bnx2x: [bnx2x_io_slot_reset:14287(eth14)]IO slot reset --> driver unload Uninterruptible tasks ===================== crash> ps | grep UN 213 2 11 c000000004c89e00 UN 0.0 0 0 [eehd] 215 2 0 c000000004c80000 UN 0.0 0 0 [kworker/0:2] 2196 1 28 c000000004504f00 UN 0.1 15936 11136 wickedd 4287 1 9 c00000020d076800 UN 0.0 4032 3008 agetty 4289 1 20 c00000020d056680 UN 0.0 7232 3840 agetty 32423 2 26 c00000020038c580 UN 0.0 0 0 [kworker/26:3] 32871 4241 27 c0000002609ddd00 UN 0.1 18624 11648 sshd 32920 10130 16 c00000027284a100 UN 0.1 48512 12608 sendmail 33092 32987 0 c000000205218b00 UN 0.1 48512 12608 sendmail 33154 4567 16 c000000260e51780 UN 0.1 48832 12864 pickup 33209 4241 36 c000000270cb6500 UN 0.1 18624 11712 sshd 33473 33283 0 c000000205211480 UN 0.1 48512 12672 sendmail 33531 4241 37 c00000023c902780 UN 0.1 18624 11648 sshd EEH handler hung while bnx2x sleeping and holding RTNL lock =========================================================== crash> bt 213 PID: 213 TASK: c000000004c89e00 CPU: 11 COMMAND: "eehd" #0 [c000000004d477e0] __schedule at c000000000c70808 #1 [c000000004d478b0] schedule at c000000000c70ee0 #2 [c000000004d478e0] schedule_timeout at c000000000c76dec #3 [c000000004d479c0] msleep at c0000000002120cc #4 [c000000004d479f0] napi_disable at c000000000a06448 ^^^^^^^^^^^^^^^^ #5 [c000000004d47a30] bnx2x_netif_stop at c0080000018dba94 [bnx2x] #6 [c000000004d47a60] bnx2x_io_slot_reset at c0080000018a551c [bnx2x] #7 [c000000004d47b20] eeh_report_reset at c00000000004c9bc #8 [c000000004d47b90] eeh_pe_report at c00000000004d1a8 #9 [c000000004d47c40] eeh_handle_normal_event at c00000000004da64 And the sleeping source code ============================ crash> dis -ls c000000000a06448 FILE: ../net/core/dev.c LINE: 6702 6697 { 6698 might_sleep(); 6699 set_bit(NAPI_STATE_DISABLE, &n->state); 6700 6701 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) * 6702 msleep(1); 6703 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state)) 6704 msleep(1); 6705 6706 hrtimer_cancel(&n->timer); 6707 6708 clear_bit(NAPI_STATE_DISABLE, &n->state); 6709 } EEH calls into bnx2x twice based on the system log above, first through bnx2x_io_error_detected() and then bnx2x_io_slot_reset(), and executes the following call chains: bnx2x_io_error_detected() +-> bnx2x_eeh_nic_unload() +-> bnx2x_del_all_napi() +-> __netif_napi_del() bnx2x_io_slot_reset() +-> bnx2x_netif_stop() +-> bnx2x_napi_disable() +->napi_disable() Fix this by correcting the sequence of NAPI APIs usage, that is delete the NAPI after disabling it. Fixes: 7fa6f34081f1 ("bnx2x: AER revised") Reported-by: David Christensen Tested-by: David Christensen Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Link: https://lore.kernel.org/r/20220426153913.6966-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c19b072f3a23..962253db25b8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14153,10 +14153,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) /* Stop Tx */ bnx2x_tx_disable(bp); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); netdev_reset_tc(bp->dev); del_timer_sync(&bp->timer); @@ -14261,6 +14257,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); + + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + bnx2x_free_irq(bp); /* Report UNLOAD_DONE to MCP */ From d2b52ec056d5bddb055c8f21d7489a23548d0838 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 26 Apr 2022 20:52:31 +0800 Subject: [PATCH 394/803] net: fec: add missing of_node_put() in fec_enet_init_stop_mode() Put device node in error path in fec_enet_init_stop_mode(). Fixes: 8a448bf832af ("net: ethernet: fec: move GPR register offset and bit into DT") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220426125231.375688-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 11227f51404c..9f33ec838b52 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3731,7 +3731,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep, ARRAY_SIZE(out_val)); if (ret) { dev_dbg(&fep->pdev->dev, "no stop mode property\n"); - return ret; + goto out; } fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np); From 8be70a842f70c0fe8e00fd488b1966344fa10ff4 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Wed, 27 Apr 2022 14:32:50 -0400 Subject: [PATCH 395/803] scsi: target: pscsi: Set SCF_TREAT_READ_AS_NORMAL flag only if there is valid data With tape devices, the SCF_TREAT_READ_AS_NORMAL flag is used by the target subsystem to mark commands which have both data to return as well as sense data. But with pscsi, SCF_TREAT_READ_AS_NORMAL can be set even if there is no data to return. The SCF_TREAT_READ_AS_NORMAL flag causes the target core to call iscsit data-in callbacks even if there is no data, which iscsit does not support. This results in iscsit going into an error state requiring recovery and being unable to complete the command to the initiator. This issue can be resolved by fixing pscsi to only set SCF_TREAT_READ_AS_NORMAL if there is valid data to return alongside the sense data. Link: https://lore.kernel.org/r/20220427183250.291881-1-djeffery@redhat.com Fixes: bd81372065fa ("scsi: target: transport should handle st FM/EOM/ILI reads") Reported-by: Scott Hamilton Tested-by: Laurence Oberman Reviewed-by: Laurence Oberman Signed-off-by: David Jeffery Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pscsi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index ff292b75e23f..60dafe4c581b 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -588,7 +588,7 @@ static void pscsi_destroy_device(struct se_device *dev) } static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status, - unsigned char *req_sense) + unsigned char *req_sense, int valid_data) { struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev); struct scsi_device *sd = pdv->pdv_sd; @@ -681,7 +681,7 @@ after_mode_select: * back despite framework assumption that a * check condition means there is no data */ - if (sd->type == TYPE_TAPE && + if (sd->type == TYPE_TAPE && valid_data && cmd->data_direction == DMA_FROM_DEVICE) { /* * is sense data valid, fixed format, @@ -1032,6 +1032,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) struct se_cmd *cmd = req->end_io_data; struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); enum sam_status scsi_status = scmd->result & 0xff; + int valid_data = cmd->data_length - scmd->resid_len; u8 *cdb = cmd->priv; if (scsi_status != SAM_STAT_GOOD) { @@ -1039,12 +1040,11 @@ static void pscsi_req_done(struct request *req, blk_status_t status) " 0x%02x Result: 0x%08x\n", cmd, cdb[0], scmd->result); } - pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer); + pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer, valid_data); switch (host_byte(scmd->result)) { case DID_OK: - target_complete_cmd_with_length(cmd, scsi_status, - cmd->data_length - scmd->resid_len); + target_complete_cmd_with_length(cmd, scsi_status, valid_data); break; default: pr_debug("PSCSI Host Byte exception at cmd: %p CDB:" From cae8d1f5e34e5b2604a52d705218a6b2d288365f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 2 Apr 2022 12:08:38 +0200 Subject: [PATCH 396/803] iommu/fsl_pamu: Prepare cleanup of powerpc's asm/prom.h powerpc's asm/prom.h brings some headers that it doesn't need itself. In order to clean it up, first add missing headers in users of asm/prom.h Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/06862cca930068e8fa4fdd0b20d74872d3b929d6.1648833431.git.christophe.leroy@csgroup.eu Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 3 +++ drivers/iommu/fsl_pamu_domain.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index fc38b1fba7cf..0d03f837a5d4 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 69a4a62dc3b9..94b4589dc67c 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -9,6 +9,7 @@ #include "fsl_pamu_domain.h" +#include #include /* From 121660bba631104154b7c15e88f208c48c8c3297 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 4 Apr 2022 15:47:22 -0500 Subject: [PATCH 397/803] iommu/amd: Enable swiotlb in all cases Previously the AMD IOMMU would only enable SWIOTLB in certain circumstances: * IOMMU in passthrough mode * SME enabled This logic however doesn't work when an untrusted device is plugged in that doesn't do page aligned DMA transactions. The expectation is that a bounce buffer is used for those transactions. This fails like this: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) That happens because the bounce buffers have been allocated, followed by freed during startup but the bounce buffering code expects that all IOMMUs have left it enabled. Remove the criteria to set up bounce buffers on AMD systems to ensure they're always available for supporting untrusted devices. Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers") Suggested-by: Christoph Hellwig Signed-off-by: Mario Limonciello Reviewed-by: Robin Murphy Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220404204723.9767-2-mario.limonciello@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a1ada7bff44e..079694f894b8 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1838,17 +1838,10 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_domain_flush_complete(domain); } -static void __init amd_iommu_init_dma_ops(void) -{ - swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; -} - int __init amd_iommu_init_api(void) { int err; - amd_iommu_init_dma_ops(); - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; From f316ba0a8814f4c91e80a435da3421baf0ddd24c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 4 Apr 2022 15:47:23 -0500 Subject: [PATCH 398/803] dma-iommu: Check that swiotlb is active before trying to use it If the IOMMU is in use and an untrusted device is connected to an external facing port but the address requested isn't page aligned will cause the kernel to attempt to use bounce buffers. If for some reason the bounce buffers have not been allocated this is a problem that should be made apparent to the user. Signed-off-by: Mario Limonciello Reviewed-by: Robin Murphy Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220404204723.9767-3-mario.limonciello@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 09f6e1c0f9c0..1ca85d37eeab 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -971,6 +971,11 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, void *padding_start; size_t padding_size, aligned_size; + if (!is_swiotlb_active(dev)) { + dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); + return DMA_MAPPING_ERROR; + } + aligned_size = iova_align(iovad, size); phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, iova_mask(iovad), dir, attrs); From ed36d04e8f8d7b00db451b0fa56a54e8e02ec43e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Apr 2022 13:42:02 +0100 Subject: [PATCH 399/803] iommu: Introduce device_iommu_capable() iommu_capable() only really works for systems where all IOMMU instances are completely homogeneous, and all devices are IOMMU-mapped. Implement the new variant which will be able to give a more accurate answer for whichever device the caller is actually interested in, and even more so once all the external users have been converted and we can reliably pass the device pointer through the internal driver interface too. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/8407eb9586677995b7a9fd70d0fd82d85929a9bb.1650878781.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 23 +++++++++++++++++++++++ include/linux/iommu.h | 6 ++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2c45b85b9fc..780c11734979 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1906,6 +1906,29 @@ bool iommu_present(struct bus_type *bus) } EXPORT_SYMBOL_GPL(iommu_present); +/** + * device_iommu_capable() - check for a general IOMMU capability + * @dev: device to which the capability would be relevant, if available + * @cap: IOMMU capability + * + * Return: true if an IOMMU is present and supports the given capability + * for the given device, otherwise false. + */ +bool device_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + const struct iommu_ops *ops; + + if (!dev->iommu || !dev->iommu->iommu_dev) + return false; + + ops = dev_iommu_ops(dev); + if (!ops->capable) + return false; + + return ops->capable(cap); +} +EXPORT_SYMBOL_GPL(device_iommu_capable); + bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) { if (!bus->iommu_ops || !bus->iommu_ops->capable) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9208eca4b0d1..e26cf84e5d82 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -417,6 +417,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); +extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); @@ -689,6 +690,11 @@ static inline bool iommu_present(struct bus_type *bus) return false; } +static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + return false; +} + static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) { return false; From d0be55fbeb6ac694d15af5d1aad19cdec8cd64e5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Apr 2022 13:42:03 +0100 Subject: [PATCH 400/803] iommu: Add capability for pre-boot DMA protection VT-d's dmar_platform_optin() actually represents a combination of properties fairly well standardised by Microsoft as "Pre-boot DMA Protection" and "Kernel DMA Protection"[1]. As such, we can provide interested consumers with an abstracted capability rather than driver-specific interfaces that won't scale. We name it for the former aspect since that's what external callers are most likely to be interested in; the latter is for the IOMMU layer to handle itself. [1] https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/oem-kernel-dma-protection Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d6218dff2702472da80db6aec2c9589010684551.1650878781.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 ++ include/linux/iommu.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index df5c62ecf942..0edf6084dc14 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4551,6 +4551,8 @@ static bool intel_iommu_capable(enum iommu_cap cap) return domain_update_iommu_snooping(NULL); if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; + if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION) + return dmar_platform_optin(); return false; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e26cf84e5d82..4123693ae319 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -107,6 +107,8 @@ enum iommu_cap { transactions */ IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ + IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for + DMA protection and we should too */ }; /* These are the possible reserved region types */ From 86eaf4a5b4312bea8676fb79399d9e08b53d8e71 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Apr 2022 13:42:04 +0100 Subject: [PATCH 401/803] thunderbolt: Make iommu_dma_protection more accurate Between me trying to get rid of iommu_present() and Mario wanting to support the AMD equivalent of DMAR_PLATFORM_OPT_IN, scrutiny has shown that the iommu_dma_protection attribute is being far too optimistic. Even if an IOMMU might be present for some PCI segment in the system, that doesn't necessarily mean it provides translation for the device(s) we care about. Furthermore, all that DMAR_PLATFORM_OPT_IN really does is tell us that memory was protected before the kernel was loaded, and prevent the user from disabling the intel-iommu driver entirely. While that lets us assume kernel integrity, what matters for actual runtime DMA protection is whether we trust individual devices, based on the "external facing" property that we expect firmware to describe for Thunderbolt ports. It's proven challenging to determine the appropriate ports accurately given the variety of possible topologies, so while still not getting a perfect answer, by putting enough faith in firmware we can at least get a good bit closer. If we can see that any device near a Thunderbolt NHI has all the requisites for Kernel DMA Protection, chances are that it *is* a relevant port, but moreover that implies that firmware is playing the game overall, so we'll use that to assume that all Thunderbolt ports should be correctly marked and thus will end up fully protected. CC: Mario Limonciello Reviewed-by: Christoph Hellwig Acked-by: Mika Westerberg Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b153f208bc9eafab5105bad0358b77366509d2d4.1650878781.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/thunderbolt/domain.c | 12 +++------- drivers/thunderbolt/nhi.c | 44 ++++++++++++++++++++++++++++++++++++ include/linux/thunderbolt.h | 2 ++ 3 files changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c index 7018d959f775..2889a214dadc 100644 --- a/drivers/thunderbolt/domain.c +++ b/drivers/thunderbolt/domain.c @@ -7,9 +7,7 @@ */ #include -#include #include -#include #include #include #include @@ -257,13 +255,9 @@ static ssize_t iommu_dma_protection_show(struct device *dev, struct device_attribute *attr, char *buf) { - /* - * Kernel DMA protection is a feature where Thunderbolt security is - * handled natively using IOMMU. It is enabled when IOMMU is - * enabled and ACPI DMAR table has DMAR_PLATFORM_OPT_IN set. - */ - return sprintf(buf, "%d\n", - iommu_present(&pci_bus_type) && dmar_platform_optin()); + struct tb *tb = container_of(dev, struct tb, dev); + + return sysfs_emit(buf, "%d\n", tb->nhi->iommu_dma_protection); } static DEVICE_ATTR_RO(iommu_dma_protection); diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 4a582183f675..4bc87b0f003a 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include #include "nhi.h" #include "nhi_regs.h" @@ -1103,6 +1105,47 @@ static void nhi_check_quirks(struct tb_nhi *nhi) nhi->quirks |= QUIRK_AUTO_CLEAR_INT; } +static int nhi_check_iommu_pdev(struct pci_dev *pdev, void *data) +{ + if (!pdev->external_facing || + !device_iommu_capable(&pdev->dev, IOMMU_CAP_PRE_BOOT_PROTECTION)) + return 0; + *(bool *)data = true; + return 1; /* Stop walking */ +} + +static void nhi_check_iommu(struct tb_nhi *nhi) +{ + struct pci_bus *bus = nhi->pdev->bus; + bool port_ok = false; + + /* + * Ideally what we'd do here is grab every PCI device that + * represents a tunnelling adapter for this NHI and check their + * status directly, but unfortunately USB4 seems to make it + * obnoxiously difficult to reliably make any correlation. + * + * So for now we'll have to bodge it... Hoping that the system + * is at least sane enough that an adapter is in the same PCI + * segment as its NHI, if we can find *something* on that segment + * which meets the requirements for Kernel DMA Protection, we'll + * take that to imply that firmware is aware and has (hopefully) + * done the right thing in general. We need to know that the PCI + * layer has seen the ExternalFacingPort property which will then + * inform the IOMMU layer to enforce the complete "untrusted DMA" + * flow, but also that the IOMMU driver itself can be trusted not + * to have been subverted by a pre-boot DMA attack. + */ + while (bus->parent) + bus = bus->parent; + + pci_walk_bus(bus, nhi_check_iommu_pdev, &port_ok); + + nhi->iommu_dma_protection = port_ok; + dev_dbg(&nhi->pdev->dev, "IOMMU DMA protection is %s\n", + str_enabled_disabled(port_ok)); +} + static int nhi_init_msi(struct tb_nhi *nhi) { struct pci_dev *pdev = nhi->pdev; @@ -1220,6 +1263,7 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; nhi_check_quirks(nhi); + nhi_check_iommu(nhi); res = nhi_init_msi(nhi); if (res) { diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 124e13cb1469..7a8ad984e651 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -465,6 +465,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) * @msix_ida: Used to allocate MSI-X vectors for rings * @going_away: The host controller device is about to disappear so when * this flag is set, avoid touching the hardware anymore. + * @iommu_dma_protection: An IOMMU will isolate external-facing ports. * @interrupt_work: Work scheduled to handle ring interrupt when no * MSI-X is used. * @hop_count: Number of rings (end point hops) supported by NHI. @@ -479,6 +480,7 @@ struct tb_nhi { struct tb_ring **rx_rings; struct ida msix_ida; bool going_away; + bool iommu_dma_protection; struct work_struct interrupt_work; u32 hop_count; unsigned long quirks; From f1ca70717bcb4525e29da422f3d280acbddb36fe Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 25 Apr 2022 13:42:05 +0100 Subject: [PATCH 402/803] iommu/amd: Indicate whether DMA remap support is enabled Bit 1 of the IVFS IVInfo field indicates that IOMMU has been used for pre-boot DMA protection. Export this capability to allow other places in the kernel to be able to check for it on AMD systems. Link: https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf Reviewed-by: Christoph Hellwig Signed-off-by: Mario Limonciello Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ce7627fa1c596878ca6515dd9d4381a45b6ee38c.1650878781.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 4 ++++ drivers/iommu/amd/init.c | 3 +++ drivers/iommu/amd/iommu.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 47108ed44fbb..72d0f5e2f651 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -407,6 +407,7 @@ /* IOMMU IVINFO */ #define IOMMU_IVINFO_OFFSET 36 #define IOMMU_IVINFO_EFRSUP BIT(0) +#define IOMMU_IVINFO_DMA_REMAP BIT(1) /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 @@ -449,6 +450,9 @@ extern struct irq_remap_table **irq_lookup_table; /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; +/* IVRS indicates that pre-boot remapping was enabled */ +extern bool amdr_ivrs_remap_support; + /* kmem_cache to get tables with 128 byte alignement */ extern struct kmem_cache *amd_iommu_irq_cache; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index b4a798c7b347..0467918bf7fd 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -182,6 +182,7 @@ u32 amd_iommu_max_pasid __read_mostly = ~0; bool amd_iommu_v2_present __read_mostly; static bool amd_iommu_pc_present __read_mostly; +bool amdr_ivrs_remap_support __read_mostly; bool amd_iommu_force_isolation __read_mostly; @@ -326,6 +327,8 @@ static void __init early_iommu_features_init(struct amd_iommu *iommu, { if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) iommu->features = h->efr_reg; + if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) + amdr_ivrs_remap_support = true; } /* Access to l1 and l2 indexed register spaces */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 079694f894b8..038e104b922c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2155,6 +2155,8 @@ static bool amd_iommu_capable(enum iommu_cap cap) return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; + case IOMMU_CAP_PRE_BOOT_PROTECTION: + return amdr_ivrs_remap_support; default: break; } From 59bf3557cf2f8a469a554aea1e3d2c8e72a579f7 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Sun, 10 Apr 2022 09:35:33 +0800 Subject: [PATCH 403/803] iommu/vt-d: Calculate mask for non-aligned flushes Calculate the appropriate mask for non-size-aligned page selective invalidation. Since psi uses the mask value to mask out the lower order bits of the target address, properly flushing the iotlb requires using a mask value such that [pfn, pfn+pages) all lie within the flushed size-aligned region. This is not normally an issue because iova.c always allocates iovas that are aligned to their size. However, iovas which come from other sources (e.g. userspace via VFIO) may not be aligned. To properly flush the IOTLB, both the start and end pfns need to be equal after applying the mask. That means that the most efficient mask to use is the index of the lowest bit that is equal where all higher bits are also equal. For example, if pfn=0x17f and pages=3, then end_pfn=0x181, so the smallest mask we can use is 8. Any differences above the highest bit of pages are due to carrying, so by xnor'ing pfn and end_pfn and then masking out the lower order bits based on pages, we get 0xffffff00, where the first set bit is the mask we want to use. Fixes: 6fe1010d6d9c ("vfio/type1: DMA unmap chunking") Cc: stable@vger.kernel.org Signed-off-by: David Stevens Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220401022430.1262215-1-stevensd@google.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220410013533.3959168-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index df5c62ecf942..0ea47e17b379 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1588,7 +1588,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned long pfn, unsigned int pages, int ih, int map) { - unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned int mask = ilog2(aligned_pages); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain->iommu_did[iommu->seq_id]; @@ -1600,10 +1601,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (domain_use_first_level(domain)) { qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { + unsigned long bitmask = aligned_pages - 1; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + /* * Fallback to domain selective flush if no PSI support or - * the size is too big. PSI requires page size to be 2 ^ x, - * and the base address is naturally aligned to the size. + * the size is too big. */ if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) From da8669ff41fa31573375c9a4180f5c080677204b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 23 Apr 2022 16:23:30 +0800 Subject: [PATCH 404/803] iommu/vt-d: Drop stop marker messages The page fault handling framework in the IOMMU core explicitly states that it doesn't handle PCI PASID Stop Marker and the IOMMU drivers must discard them before reporting faults. This handles Stop Marker messages in prq_event_thread() before reporting events to the core. The VT-d driver explicitly drains the pending page requests when a CPU page table (represented by a mm struct) is unbound from a PASID according to the procedures defined in the VT-d spec. The Stop Marker messages do not need a response. Hence, it is safe to drop the Stop Marker messages silently if any of them is found in the page request queue. Fixes: d5b9e4bfe0d88 ("iommu/vt-d: Report prq to io-pgfault framework") Signed-off-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220421113558.3504874-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220423082330.3897867-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 23a38763c1d1..7ee37d996e15 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -757,6 +757,10 @@ bad_req: goto bad_req; } + /* Drop Stop Marker message. No need for a response. */ + if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) + goto prq_advance; + if (!svm || svm->pasid != req->pasid) { /* * It can't go away, because the driver is not permitted From a15932f4377062364d22096afe25bc579134a1c3 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 25 Apr 2022 17:08:26 +0800 Subject: [PATCH 405/803] iommu/dart: check return value after calling platform_get_resource() It will cause null-ptr-deref in resource_size(), if platform_get_resource() returns NULL, move calling resource_size() after devm_ioremap_resource() that will check 'res' to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Fixes: 46d1fb072e76 ("iommu/dart: Add DART iommu driver") Signed-off-by: Yang Yingliang Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20220425090826.2532165-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index decafb07ad08..15b77f16cfa3 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -859,16 +859,15 @@ static int apple_dart_probe(struct platform_device *pdev) dart->dev = dev; spin_lock_init(&dart->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(dart->regs)) + return PTR_ERR(dart->regs); + if (resource_size(res) < 0x4000) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } - dart->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(dart->regs)) - return PTR_ERR(dart->regs); - dart->irq = platform_get_irq(pdev, 0); if (dart->irq < 0) return -ENODEV; From bb5bdc5ab7f133ba6fd32657d2ac90039c561e48 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Thu, 28 Apr 2022 16:52:39 +0800 Subject: [PATCH 406/803] iommu/msm: Add a check for the return of kzalloc() kzalloc() is a memory allocation function which can return NULL when some internal memory errors happen. So it is better to check it to prevent potential wrong memory access. Besides, to propagate the error to the caller, the type of insert_iommu_master() is changed to `int`. Several instructions related to it are also updated. Signed-off-by: Xiaoke Wang Link: https://lore.kernel.org/r/tencent_EDB94B1C7E14B4E1974A66FF4D2029CC6D08@qq.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 50f57624610f..3d9bd2043738 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -583,7 +583,7 @@ static void print_ctx_regs(void __iomem *base, int ctx) GET_SCTLR(base, ctx), GET_ACTLR(base, ctx)); } -static void insert_iommu_master(struct device *dev, +static int insert_iommu_master(struct device *dev, struct msm_iommu_dev **iommu, struct of_phandle_args *spec) { @@ -592,6 +592,10 @@ static void insert_iommu_master(struct device *dev, if (list_empty(&(*iommu)->ctx_list)) { master = kzalloc(sizeof(*master), GFP_ATOMIC); + if (!master) { + dev_err(dev, "Failed to allocate iommu_master\n"); + return -ENOMEM; + } master->of_node = dev->of_node; list_add(&master->list, &(*iommu)->ctx_list); dev_iommu_priv_set(dev, master); @@ -601,10 +605,11 @@ static void insert_iommu_master(struct device *dev, if (master->mids[sid] == spec->args[0]) { dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n", sid); - return; + return 0; } master->mids[master->num_mids++] = spec->args[0]; + return 0; } static int qcom_iommu_of_xlate(struct device *dev, @@ -624,7 +629,7 @@ static int qcom_iommu_of_xlate(struct device *dev, goto fail; } - insert_iommu_master(dev, &iommu, spec); + ret = insert_iommu_master(dev, &iommu, spec); fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); From c35fe2a68f29a0bda15ae994154cacaae5f69791 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Apr 2022 16:18:33 +0100 Subject: [PATCH 407/803] elf: Fix the arm64 MTE ELF segment name and value Unfortunately, the name/value choice for the MTE ELF segment type (PT_ARM_MEMTAG_MTE) was pretty poor: LOPROC+1 is already in use by PT_AARCH64_UNWIND, as defined in the AArch64 ELF ABI (https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst). Update the ELF segment type value to LOPROC+2 and also change the define to PT_AARCH64_MEMTAG_MTE to match the AArch64 ELF ABI namespace. The AArch64 ELF ABI document is updating accordingly (segment type not previously mentioned in the document). Signed-off-by: Catalin Marinas Fixes: 761b9b366cec ("elf: Introduce the ARM MTE ELF segment type") Cc: Will Deacon Cc: Jonathan Corbet Cc: Eric Biederman Cc: Kees Cook Cc: Luis Machado Cc: Richard Earnshaw Link: https://lore.kernel.org/r/20220425151833.2603830-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/memory-tagging-extension.rst | 4 ++-- arch/arm64/kernel/elfcore.c | 2 +- include/uapi/linux/elf.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index dd27f78d7608..dbae47bba25e 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -228,10 +228,10 @@ Core dump support ----------------- The allocation tags for user memory mapped with ``PROT_MTE`` are dumped -in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The program header for such segment is defined as: -:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_type``: ``PT_AARCH64_MEMTAG_MTE`` :``p_flags``: 0 :``p_offset``: segment file offset :``p_vaddr``: segment virtual address, same as the corresponding diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 2b3f3d0544b9..98d67444a5b6 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -95,7 +95,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) for_each_mte_vma(current, vma) { struct elf_phdr phdr; - phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..7ce993e6786c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -42,7 +42,7 @@ typedef __s64 Elf64_Sxword; /* ARM MTE memory tag segment type */ -#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) /* * Extended Numbering From 296abc0d91d8b65d42224dd33452ace14491ad08 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Apr 2022 14:51:33 +0200 Subject: [PATCH 408/803] gfs2: No short reads or writes upon glock contention Commit 00bfe02f4796 ("gfs2: Fix mmap + page fault deadlocks for buffered I/O") changed gfs2_file_read_iter() and gfs2_file_buffered_write() to allow dropping the inode glock while faulting in user buffers. When the lock was dropped, a short result was returned to indicate that the operation was interrupted. As pointed out by Linus (see the link below), this behavior is broken and the operations should always re-acquire the inode glock and resume the operation instead. Link: https://lore.kernel.org/lkml/CAHk-=whaz-g_nOOoo8RRiWNjnv2R+h6_xk2F1J4TuSRxk1MtLw@mail.gmail.com/ Fixes: 00bfe02f4796 ("gfs2: Fix mmap + page fault deadlocks for buffered I/O") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8d889235afcd..48f01323c37c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -991,8 +991,6 @@ retry_under_glock: if (leftover != window_size) { if (gfs2_holder_queued(&gh)) goto retry_under_glock; - if (written) - goto out_uninit; goto retry; } } @@ -1069,8 +1067,6 @@ retry_under_glock: from->count = min(from->count, window_size - leftover); if (gfs2_holder_queued(gh)) goto retry_under_glock; - if (read && !(iocb->ki_flags & IOCB_DIRECT)) - goto out_uninit; goto retry; } } From de8fd138430ccac4b8f7b812e5c6f8963b5ccf07 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Apr 2022 12:31:09 +0300 Subject: [PATCH 409/803] perf intel-pt: Fix timeless decoding with perf.data directory Intel PT does not capture data in separate directories, so do not use separate directory processing because it doesn't work for timeless decoding. It also looks like it doesn't support one_mmap handling. Example: Before: # perf record --kcore -a -e intel_pt/tsc=0/k sleep 0.1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.799 MB perf.data ] # perf script --itrace=bep | head # After: # perf script --itrace=bep | head perf 21073 [000] psb: psb offs: 0 ffffffffaa68faf4 native_write_msr+0x4 ([kernel.kallsyms]) perf 21073 [000] cbr: cbr: 45 freq: 4505 MHz (161%) ffffffffaa68faf4 native_write_msr+0x4 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: 0 [unknown] ([unknown]) => ffffffffaa68faf6 native_write_msr+0x6 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa68faf8 native_write_msr+0x8 ([kernel.kallsyms]) => ffffffffaa61aab0 pt_config_start+0x60 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa61aabd pt_config_start+0x6d ([kernel.kallsyms]) => ffffffffaa61b8ad pt_event_start+0x27d ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa61b8bb pt_event_start+0x28b ([kernel.kallsyms]) => ffffffffaa61ba60 pt_event_add+0x40 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa61ba76 pt_event_add+0x56 ([kernel.kallsyms]) => ffffffffaa880e86 event_sched_in+0xc6 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa880e9b event_sched_in+0xdb ([kernel.kallsyms]) => ffffffffaa880ea5 event_sched_in+0xe5 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa880eba event_sched_in+0xfa ([kernel.kallsyms]) => ffffffffaa880f96 event_sched_in+0x1d6 ([kernel.kallsyms]) perf 21073 [000] 1 branches:k: ffffffffaa880fc8 event_sched_in+0x208 ([kernel.kallsyms]) => ffffffffaa880ec0 event_sched_in+0x100 ([kernel.kallsyms]) Fixes: bb6be405c4a2a5 ("perf session: Load data directory files for analysis") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20220428093109.274641-1-adrian.hunter@intel.com Cc: Ian Rogers Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: linux-kernel@vger.kernel.org --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 45a30040ec8d..f9a320694b85 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2576,7 +2576,7 @@ int perf_session__process_events(struct perf_session *session) if (perf_data__is_pipe(session->data)) return __perf_session__process_pipe_events(session); - if (perf_data__is_dir(session->data)) + if (perf_data__is_dir(session->data) && session->data->dir.nr) return __perf_session__process_dir_events(session); return __perf_session__process_events(session); From 1ea2a07a532b0e22aabe7e8483f935c672b9e7ed Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:50 +0800 Subject: [PATCH 410/803] iommu: Add DMA ownership management interfaces Multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. These devices must either be entirely under kernel control or userspace control, never a mixture. This adds dma ownership management in iommu core and exposes several interfaces for the device drivers and the device userspace assignment framework (i.e. VFIO), so that any conflict between user and kernel controlled dma could be detected at the beginning. The device driver oriented interfaces are, int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); By calling iommu_device_use_default_domain(), the device driver tells the iommu layer that the device dma is handled through the kernel DMA APIs. The iommu layer will manage the IOVA and use the default domain for DMA address translation. The device user-space assignment framework oriented interfaces are, int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); The device userspace assignment must be disallowed if the DMA owner claiming interface returns failure. Signed-off-by: Jason Gunthorpe Signed-off-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220418005000.897664-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 153 +++++++++++++++++++++++++++++++++++++++++- include/linux/iommu.h | 31 +++++++++ 2 files changed, 181 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2c45b85b9fc..eba8e8ccf19d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -48,6 +48,8 @@ struct iommu_group { struct iommu_domain *default_domain; struct iommu_domain *domain; struct list_head entry; + unsigned int owner_cnt; + void *owner; }; struct group_device { @@ -294,7 +296,11 @@ int iommu_probe_device(struct device *dev) mutex_lock(&group->mutex); iommu_alloc_default_domain(group, dev); - if (group->default_domain) { + /* + * If device joined an existing group which has been claimed, don't + * attach the default domain. + */ + if (group->default_domain && !group->owner) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { mutex_unlock(&group->mutex); @@ -2109,7 +2115,7 @@ static int __iommu_attach_group(struct iommu_domain *domain, { int ret; - if (group->default_domain && group->domain != group->default_domain) + if (group->domain && group->domain != group->default_domain) return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, @@ -2146,7 +2152,11 @@ static void __iommu_detach_group(struct iommu_domain *domain, { int ret; - if (!group->default_domain) { + /* + * If the group has been claimed already, do not re-attach the default + * domain. + */ + if (!group->default_domain || group->owner) { __iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device); group->domain = NULL; @@ -3095,3 +3105,140 @@ out: return ret; } + +/** + * iommu_device_use_default_domain() - Device driver wants to handle device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver about to bind @dev wants to do DMA through the kernel + * DMA API. Return 0 if it is allowed, otherwise an error. + */ +int iommu_device_use_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + int ret = 0; + + if (!group) + return 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + if (group->domain != group->default_domain || + group->owner) { + ret = -EBUSY; + goto unlock_out; + } + } + + group->owner_cnt++; + +unlock_out: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} + +/** + * iommu_device_unuse_default_domain() - Device driver stops handling device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver doesn't want to do DMA through kernel DMA API anymore. + * It must be called after iommu_device_use_default_domain(). + */ +void iommu_device_unuse_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + + if (!group) + return; + + mutex_lock(&group->mutex); + if (!WARN_ON(!group->owner_cnt)) + group->owner_cnt--; + + mutex_unlock(&group->mutex); + iommu_group_put(group); +} + +/** + * iommu_group_claim_dma_owner() - Set DMA ownership of a group + * @group: The group. + * @owner: Caller specified pointer. Used for exclusive ownership. + * + * This is to support backward compatibility for vfio which manages + * the dma ownership in iommu_group level. New invocations on this + * interface should be prohibited. + */ +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + int ret = 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + ret = -EPERM; + goto unlock_out; + } else { + if (group->domain && group->domain != group->default_domain) { + ret = -EBUSY; + goto unlock_out; + } + + group->owner = owner; + if (group->domain) + __iommu_detach_group(group->domain, group); + } + + group->owner_cnt++; +unlock_out: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); + +/** + * iommu_group_release_dma_owner() - Release DMA ownership of a group + * @group: The group. + * + * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + */ +void iommu_group_release_dma_owner(struct iommu_group *group) +{ + mutex_lock(&group->mutex); + if (WARN_ON(!group->owner_cnt || !group->owner)) + goto unlock_out; + + group->owner_cnt = 0; + /* + * The UNMANAGED domain should be detached before all USER + * owners have been released. + */ + if (!WARN_ON(group->domain) && group->default_domain) + __iommu_attach_group(group->default_domain, group); + group->owner = NULL; +unlock_out: + mutex_unlock(&group->mutex); +} +EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); + +/** + * iommu_group_dma_owner_claimed() - Query group dma ownership status + * @group: The group. + * + * This provides status query on a given group. It is racy and only for + * non-binding status reporting. + */ +bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + unsigned int user; + + mutex_lock(&group->mutex); + user = group->owner_cnt; + mutex_unlock(&group->mutex); + + return user; +} +EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9208eca4b0d1..77972ef978b5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -675,6 +675,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +int iommu_device_use_default_domain(struct device *dev); +void iommu_device_unuse_default_domain(struct device *dev); + +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); +void iommu_group_release_dma_owner(struct iommu_group *group); +bool iommu_group_dma_owner_claimed(struct iommu_group *group); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1031,6 +1038,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; } + +static inline int iommu_device_use_default_domain(struct device *dev) +{ + return 0; +} + +static inline void iommu_device_unuse_default_domain(struct device *dev) +{ +} + +static inline int +iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + return -ENODEV; +} + +static inline void iommu_group_release_dma_owner(struct iommu_group *group) +{ +} + +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + return false; +} #endif /* CONFIG_IOMMU_API */ /** From 25f3bcfc54bcf7b0e45d140ec8bfbbf10ba11869 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:51 +0800 Subject: [PATCH 411/803] driver core: Add dma_cleanup callback in bus_type The bus_type structure defines dma_configure() callback for bus drivers to configure DMA on the devices. This adds the paired dma_cleanup() callback and calls it during driver unbinding so that bus drivers can do some cleanup work. One use case for this paired DMA callbacks is for the bus driver to check for DMA ownership conflicts during driver binding, where multiple devices belonging to a same IOMMU group (the minimum granularity of isolation and protection) may be assigned to kernel drivers or user space respectively. Without this change, for example, the vfio driver has to listen to a bus BOUND_DRIVER event and then BUG_ON() in case of dma ownership conflict. This leads to bad user experience since careless driver binding operation may crash the system if the admin overlooks the group restriction. Aside from bad design, this leads to a security problem as a root user, even with lockdown=integrity, can force the kernel to BUG. With this change, the bus driver could check and set the DMA ownership in driver binding process and fail on ownership conflicts. The DMA ownership should be released during driver unbinding. Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220418005000.897664-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/base/dd.c | 5 +++++ include/linux/device/bus.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3fc3b5940bb3..94b7ac9bf459 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -671,6 +671,8 @@ sysfs_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); pinctrl_bind_failed: device_links_no_driver(dev); device_unbind_cleanup(dev); @@ -1199,6 +1201,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) device_remove(dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); + device_links_driver_cleanup(dev); device_unbind_cleanup(dev); diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index a039ab809753..d8b29ccd07e5 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -59,6 +59,8 @@ struct fwnode_handle; * bus supports. * @dma_configure: Called to setup DMA configuration on a device on * this bus. + * @dma_cleanup: Called to cleanup DMA configuration on a device on + * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -103,6 +105,7 @@ struct bus_type { int (*num_vf)(struct device *dev); int (*dma_configure)(struct device *dev); + void (*dma_cleanup)(struct device *dev); const struct dev_pm_ops *pm; From 4a6d9dd564d0e7339fc15ecc5ce66db4ad842be2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:52 +0800 Subject: [PATCH 412/803] amba: Stop sharing platform_dma_configure() Stop sharing platform_dma_configure() helper as they are about to have their own bus dma_configure callbacks. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220418005000.897664-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/amba/bus.c | 19 ++++++++++++++++++- drivers/base/platform.c | 3 +-- include/linux/platform_device.h | 2 -- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index d3bd14aaabf6..76b52bd2c2a4 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -273,6 +275,21 @@ static void amba_shutdown(struct device *dev) drv->shutdown(to_amba_device(dev)); } +static int amba_dma_configure(struct device *dev) +{ + enum dev_dma_attr attr; + int ret = 0; + + if (dev->of_node) { + ret = of_dma_configure(dev, dev->of_node, true); + } else if (has_acpi_companion(dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); + ret = acpi_dma_configure(dev, attr); + } + + return ret; +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -341,7 +358,7 @@ struct bus_type amba_bustype = { .probe = amba_probe, .remove = amba_remove, .shutdown = amba_shutdown, - .dma_configure = platform_dma_configure, + .dma_configure = amba_dma_configure, .pm = &amba_pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 8cc272fd5c99..d7915734d931 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1454,8 +1454,7 @@ static void platform_shutdown(struct device *_dev) drv->shutdown(dev); } - -int platform_dma_configure(struct device *dev) +static int platform_dma_configure(struct device *dev) { enum dev_dma_attr attr; int ret = 0; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 7c96f169d274..17fde717df68 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -328,8 +328,6 @@ extern int platform_pm_restore(struct device *dev); #define platform_pm_restore NULL #endif -extern int platform_dma_configure(struct device *dev); - #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ .suspend = platform_pm_suspend, \ From 512881eacfa72c2136b27b9934b7b27504a9efc2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:53 +0800 Subject: [PATCH 413/803] bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers with the device DMA managed by kernel drivers or user-space applications. Unfortunately, multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. The DMA on these devices must either be entirely under kernel control or userspace control, never a mixture. Otherwise the driver integrity is not guaranteed because they could access each other through the peer-to-peer accesses which by-pass the IOMMU protection. This checks and sets the default DMA mode during driver binding, and cleanups during driver unbinding. In the default mode, the device DMA is managed by the device driver which handles DMA operations through the kernel DMA APIs (see Documentation/core-api/dma-api.rst). For cases where the devices are assigned for userspace control through the userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/ vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this default setting in the assumption that the drivers know what they are doing with the device DMA. Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure is currently a problem. As things stand, the IOMMU driver ignored the initial iommu_probe_device() call when the device was added, since at that point it had no fwspec yet. In this situation, {of,acpi}_iommu_configure() are retriggering iommu_probe_device() after the IOMMU driver has seen the firmware data via .of_xlate to learn that it actually responsible for the given device. As the result, before that gets fixed, iommu_use_default_domain() goes at the end, and calls arch_teardown_dma_ops() if it fails. Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Stuart Yoder Cc: Laurentiu Tudor Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Tested-by: Eric Auger Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/amba/bus.c | 18 ++++++++++++++++++ drivers/base/platform.c | 18 ++++++++++++++++++ drivers/bus/fsl-mc/fsl-mc-bus.c | 24 ++++++++++++++++++++++-- drivers/pci/pci-driver.c | 18 ++++++++++++++++++ include/linux/amba/bus.h | 8 ++++++++ include/linux/fsl/mc.h | 8 ++++++++ include/linux/pci.h | 8 ++++++++ include/linux/platform_device.h | 8 ++++++++ 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 76b52bd2c2a4..a0ec61232b6c 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -277,6 +279,7 @@ static void amba_shutdown(struct device *dev) static int amba_dma_configure(struct device *dev) { + struct amba_driver *drv = to_amba_driver(dev->driver); enum dev_dma_attr attr; int ret = 0; @@ -287,9 +290,23 @@ static int amba_dma_configure(struct device *dev) ret = acpi_dma_configure(dev, attr); } + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void amba_dma_cleanup(struct device *dev) +{ + struct amba_driver *drv = to_amba_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -359,6 +376,7 @@ struct bus_type amba_bustype = { .remove = amba_remove, .shutdown = amba_shutdown, .dma_configure = amba_dma_configure, + .dma_cleanup = amba_dma_cleanup, .pm = &amba_pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d7915734d931..70bc30cf575c 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "base.h" #include "power/power.h" @@ -1456,6 +1458,7 @@ static void platform_shutdown(struct device *_dev) static int platform_dma_configure(struct device *dev) { + struct platform_driver *drv = to_platform_driver(dev->driver); enum dev_dma_attr attr; int ret = 0; @@ -1466,9 +1469,23 @@ static int platform_dma_configure(struct device *dev) ret = acpi_dma_configure(dev, attr); } + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void platform_dma_cleanup(struct device *dev) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + static const struct dev_pm_ops platform_dev_pm_ops = { SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL) USE_PLATFORM_PM_SLEEP_OPS @@ -1483,6 +1500,7 @@ struct bus_type platform_bus_type = { .remove = platform_remove, .shutdown = platform_shutdown, .dma_configure = platform_dma_configure, + .dma_cleanup = platform_dma_cleanup, .pm = &platform_dev_pm_ops, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 8fd4a356a86e..76648c4fdaf4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fsl-mc-private.h" @@ -140,15 +141,33 @@ static int fsl_mc_dma_configure(struct device *dev) { struct device *dma_dev = dev; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); u32 input_id = mc_dev->icid; + int ret; while (dev_is_fsl_mc(dma_dev)) dma_dev = dma_dev->parent; if (dev_of_node(dma_dev)) - return of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + ret = of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + else + ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); - return acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); + if (!ret && !mc_drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + + return ret; +} + +static void fsl_mc_dma_cleanup(struct device *dev) +{ + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); + + if (!mc_drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, @@ -312,6 +331,7 @@ struct bus_type fsl_mc_bus_type = { .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, .dma_configure = fsl_mc_dma_configure, + .dma_cleanup = fsl_mc_dma_cleanup, .dev_groups = fsl_mc_dev_groups, .bus_groups = fsl_mc_bus_groups, }; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4ceeb75fc899..f83f7fbac68f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "pci.h" #include "pcie/portdrv.h" @@ -1601,6 +1602,7 @@ static int pci_bus_num_vf(struct device *dev) */ static int pci_dma_configure(struct device *dev) { + struct pci_driver *driver = to_pci_driver(dev->driver); struct device *bridge; int ret = 0; @@ -1616,9 +1618,24 @@ static int pci_dma_configure(struct device *dev) } pci_put_host_bridge_device(bridge); + + if (!ret && !driver->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void pci_dma_cleanup(struct device *dev) +{ + struct pci_driver *driver = to_pci_driver(dev->driver); + + if (!driver->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1632,6 +1649,7 @@ struct bus_type pci_bus_type = { .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, .dma_configure = pci_dma_configure, + .dma_cleanup = pci_dma_cleanup, }; EXPORT_SYMBOL(pci_bus_type); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6562f543c3e0..2ddce9bcd00e 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -79,6 +79,14 @@ struct amba_driver { void (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); const struct amba_id *id_table; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; /* diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 7b6c42bfb660..27efef8affb1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -32,6 +32,13 @@ struct fsl_mc_io; * @shutdown: Function called at shutdown time to quiesce the device * @suspend: Function called when a device is stopped * @resume: Function called when a device is resumed + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. * * Generic DPAA device driver object for device drivers that are registered * with a DPRC bus. This structure is to be embedded in each device-specific @@ -45,6 +52,7 @@ struct fsl_mc_driver { void (*shutdown)(struct fsl_mc_device *dev); int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); int (*resume)(struct fsl_mc_device *dev); + bool driver_managed_dma; }; #define to_fsl_mc_driver(_drv) \ diff --git a/include/linux/pci.h b/include/linux/pci.h index 60adf42460ab..b933d2b08d4d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -895,6 +895,13 @@ struct module; * created once it is bound to the driver. * @driver: Driver model structure. * @dynids: List of dynamically added device IDs. + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. */ struct pci_driver { struct list_head node; @@ -913,6 +920,7 @@ struct pci_driver { const struct attribute_group **dev_groups; struct device_driver driver; struct pci_dynids dynids; + bool driver_managed_dma; }; static inline struct pci_driver *to_pci_driver(struct device_driver *drv) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 17fde717df68..b3d9c744f1e5 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -210,6 +210,14 @@ struct platform_driver { struct device_driver driver; const struct platform_device_id *id_table; bool prevent_deferred_probe; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; #define to_platform_driver(drv) (container_of((drv), struct platform_driver, \ From 18c7a349d072a222ff80598373035820c194747b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:54 +0800 Subject: [PATCH 414/803] PCI: pci_stub: Set driver_managed_dma The current VFIO implementation allows pci-stub driver to be bound to a PCI device with other devices in the same IOMMU group being assigned to userspace. The pci-stub driver has no dependencies on DMA or the IOVA mapping of the device, but it does prevent the user from having direct access to the device, which is useful in some circumstances. The pci_dma_configure() marks the iommu_group as containing only devices with kernel drivers that manage DMA. For compatibility with the VFIO usage, avoid this default behavior for the pci_stub. This allows the pci_stub still able to be used by the admin to block driver binding after applying the DMA ownership to VFIO. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20220418005000.897664-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/pci-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index e408099fea52..d1f4c1ce7bd1 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -36,6 +36,7 @@ static struct pci_driver stub_driver = { .name = "pci-stub", .id_table = NULL, /* only dynamic id's */ .probe = pci_stub_probe, + .driver_managed_dma = true, }; static int __init pci_stub_init(void) From c7d469849747ce380785536173d08b30a820a83c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:55 +0800 Subject: [PATCH 415/803] PCI: portdrv: Set driver_managed_dma If a switch lacks ACS P2P Request Redirect, a device below the switch can bypass the IOMMU and DMA directly to other devices below the switch, so all the downstream devices must be in the same IOMMU group as the switch itself. The existing VFIO framework allows the portdrv driver to be bound to the bridge while its downstream devices are assigned to user space. The pci_dma_configure() marks the IOMMU group as containing only devices with kernel drivers that manage DMA. Avoid this default behavior for the portdrv driver in order for compatibility with the current VFIO usage. We achieve this by setting ".driver_managed_dma = true" in pci_driver structure. It is safe because the portdrv driver meets below criteria: - This driver doesn't use DMA, as you can't find any related calls like pci_set_master() or any kernel DMA API (dma_map_*() and etc.). - It doesn't use MMIO as you can't find ioremap() or similar calls. It's tolerant to userspace possibly also touching the same MMIO registers via P2P DMA access. Suggested-by: Jason Gunthorpe Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20220418005000.897664-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/pcie/portdrv_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 4b8801656ffb..7f8788a970ae 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -202,6 +202,8 @@ static struct pci_driver pcie_portdriver = { .err_handler = &pcie_portdrv_err_handler, + .driver_managed_dma = true, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; From 70693f470848f05fae04144675e6e41b105d45fe Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:56 +0800 Subject: [PATCH 416/803] vfio: Set DMA ownership for VFIO devices Claim group dma ownership when an IOMMU group is set to a container, and release the dma ownership once the iommu group is unset from the container. This change disallows some unsafe bridge drivers to bind to non-ACS bridges while devices under them are assigned to user space. This is an intentional enhancement and possibly breaks some existing configurations. The recommendation to such an affected user would be that the previously allowed host bridge driver was unsafe for this use case and to continue to enable assignment of devices within that group, the driver should be unbound from the bridge device or replaced with the pci-stub driver. For any bridge driver, we consider it unsafe if it satisfies any of the following conditions: 1) The bridge driver uses DMA. Calling pci_set_master() or calling any kernel DMA API (dma_map_*() and etc.) is an indicate that the driver is doing DMA. 2) If the bridge driver uses MMIO, it should be tolerant to hostile userspace also touching the same MMIO registers via P2P DMA attacks. If the bridge driver turns out to be a safe one, it could be used as before by setting the driver's .driver_managed_dma field, just like what we have done in the pcieport driver. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220418005000.897664-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 + drivers/vfio/pci/vfio_pci.c | 1 + drivers/vfio/platform/vfio_amba.c | 1 + drivers/vfio/platform/vfio_platform.c | 1 + drivers/vfio/vfio.c | 10 +++++++++- 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 6e2e62c6f47a..3feff729f3ce 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -588,6 +588,7 @@ static struct fsl_mc_driver vfio_fsl_mc_driver = { .name = "vfio-fsl-mc", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; static int __init vfio_fsl_mc_driver_init(void) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2b047469e02f..58839206d1ca 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -194,6 +194,7 @@ static struct pci_driver vfio_pci_driver = { .remove = vfio_pci_remove, .sriov_configure = vfio_pci_sriov_configure, .err_handler = &vfio_pci_core_err_handlers, + .driver_managed_dma = true, }; static void __init vfio_pci_fill_ids(void) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index badfffea14fb..1aaa4f721bd2 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -95,6 +95,7 @@ static struct amba_driver vfio_amba_driver = { .name = "vfio-amba", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; module_amba_driver(vfio_amba_driver); diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 68a1c87066d7..04f40c5acfd6 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -76,6 +76,7 @@ static struct platform_driver vfio_platform_driver = { .driver = { .name = "vfio-platform", }, + .driver_managed_dma = true, }; module_platform_driver(vfio_platform_driver); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index a4555014bd1e..56e741cbccce 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1198,6 +1198,8 @@ static void __vfio_group_unset_container(struct vfio_group *group) driver->ops->detach_group(container->iommu_data, group->iommu_group); + iommu_group_release_dma_owner(group->iommu_group); + group->container = NULL; wake_up(&group->container_q); list_del(&group->container_next); @@ -1282,13 +1284,19 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) goto unlock_out; } + ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); + if (ret) + goto unlock_out; + driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, group->iommu_group, group->type); - if (ret) + if (ret) { + iommu_group_release_dma_owner(group->iommu_group); goto unlock_out; + } } group->container = container; From 31076af0cb001f698138c8158c798ec9f4a8bbae Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:57 +0800 Subject: [PATCH 417/803] vfio: Remove use of vfio_group_viable() As DMA ownership is claimed for the iommu group when a VFIO group is added to a VFIO container, the VFIO group viability is guaranteed as long as group->container_users > 0. Remove those unnecessary group viability checks which are only hit when group->container_users is not zero. The only remaining reference is in GROUP_GET_STATUS, which could be called at any time when group fd is valid. Here we just replace the vfio_group_viable() by directly calling IOMMU core to get viability status. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220418005000.897664-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/vfio/vfio.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56e741cbccce..8a9347f732a5 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1313,12 +1313,6 @@ unlock_out: return ret; } -static bool vfio_group_viable(struct vfio_group *group) -{ - return (iommu_group_for_each_dev(group->iommu_group, - group, vfio_dev_viable) == 0); -} - static int vfio_group_add_container_user(struct vfio_group *group) { if (!atomic_inc_not_zero(&group->container_users)) @@ -1328,7 +1322,7 @@ static int vfio_group_add_container_user(struct vfio_group *group) atomic_dec(&group->container_users); return -EPERM; } - if (!group->container->iommu_driver || !vfio_group_viable(group)) { + if (!group->container->iommu_driver) { atomic_dec(&group->container_users); return -EINVAL; } @@ -1346,7 +1340,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) int ret = 0; if (0 == atomic_read(&group->container_users) || - !group->container->iommu_driver || !vfio_group_viable(group)) + !group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) @@ -1438,11 +1432,11 @@ static long vfio_group_fops_unl_ioctl(struct file *filep, status.flags = 0; - if (vfio_group_viable(group)) - status.flags |= VFIO_GROUP_FLAGS_VIABLE; - if (group->container) - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | + VFIO_GROUP_FLAGS_VIABLE; + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) + status.flags |= VFIO_GROUP_FLAGS_VIABLE; if (copy_to_user((void __user *)arg, &status, minsz)) return -EFAULT; From 93219ea94388b5195d1eeadb0bda6ed1471fba56 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 18 Apr 2022 08:49:58 +0800 Subject: [PATCH 418/803] vfio: Delete the unbound_list commit 60720a0fc646 ("vfio: Add device tracking during unbind") added the unbound list to plug a problem with KVM where KVM_DEV_VFIO_GROUP_DEL relied on vfio_group_get_external_user() succeeding to return the vfio_group from a group file descriptor. The unbound list allowed vfio_group_get_external_user() to continue to succeed in edge cases. However commit 5d6dee80a1e9 ("vfio: New external user group/file match") deleted the call to vfio_group_get_external_user() during KVM_DEV_VFIO_GROUP_DEL. Instead vfio_external_group_match_file() is used to directly match the file descriptor to the group pointer. This in turn avoids the call down to vfio_dev_viable() during KVM_DEV_VFIO_GROUP_DEL and also avoids the trouble the first commit was trying to fix. There are no other users of vfio_dev_viable() that care about the time after vfio_unregister_group_dev() returns, so simply delete the unbound_list entirely. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220418005000.897664-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/vfio/vfio.c | 74 ++------------------------------------------- 1 file changed, 2 insertions(+), 72 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8a9347f732a5..b2f19d17d0c3 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -62,11 +62,6 @@ struct vfio_container { bool noiommu; }; -struct vfio_unbound_dev { - struct device *dev; - struct list_head unbound_next; -}; - struct vfio_group { struct device dev; struct cdev cdev; @@ -79,8 +74,6 @@ struct vfio_group { struct notifier_block nb; struct list_head vfio_next; struct list_head container_next; - struct list_head unbound_list; - struct mutex unbound_lock; atomic_t opened; wait_queue_head_t container_q; enum vfio_group_type type; @@ -340,16 +333,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group) static void vfio_group_release(struct device *dev) { struct vfio_group *group = container_of(dev, struct vfio_group, dev); - struct vfio_unbound_dev *unbound, *tmp; - - list_for_each_entry_safe(unbound, tmp, - &group->unbound_list, unbound_next) { - list_del(&unbound->unbound_next); - kfree(unbound); - } mutex_destroy(&group->device_lock); - mutex_destroy(&group->unbound_lock); iommu_group_put(group->iommu_group); ida_free(&vfio.group_ida, MINOR(group->dev.devt)); kfree(group); @@ -381,8 +366,6 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, refcount_set(&group->users, 1); INIT_LIST_HEAD(&group->device_list); mutex_init(&group->device_lock); - INIT_LIST_HEAD(&group->unbound_list); - mutex_init(&group->unbound_lock); init_waitqueue_head(&group->container_q); group->iommu_group = iommu_group; /* put in vfio_group_release() */ @@ -571,19 +554,8 @@ static int vfio_dev_viable(struct device *dev, void *data) struct vfio_group *group = data; struct vfio_device *device; struct device_driver *drv = READ_ONCE(dev->driver); - struct vfio_unbound_dev *unbound; - int ret = -EINVAL; - mutex_lock(&group->unbound_lock); - list_for_each_entry(unbound, &group->unbound_list, unbound_next) { - if (dev == unbound->dev) { - ret = 0; - break; - } - } - mutex_unlock(&group->unbound_lock); - - if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) + if (!drv || vfio_dev_driver_allowed(dev, drv)) return 0; device = vfio_group_get_device(group, dev); @@ -592,7 +564,7 @@ static int vfio_dev_viable(struct device *dev, void *data) return 0; } - return ret; + return -EINVAL; } /* @@ -634,7 +606,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, { struct vfio_group *group = container_of(nb, struct vfio_group, nb); struct device *dev = data; - struct vfio_unbound_dev *unbound; switch (action) { case IOMMU_GROUP_NOTIFY_ADD_DEVICE: @@ -663,28 +634,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, __func__, iommu_group_id(group->iommu_group), dev->driver->name); break; - case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: - dev_dbg(dev, "%s: group %d unbound from driver\n", __func__, - iommu_group_id(group->iommu_group)); - /* - * XXX An unbound device in a live group is ok, but we'd - * really like to avoid the above BUG_ON by preventing other - * drivers from binding to it. Once that occurs, we have to - * stop the system to maintain isolation. At a minimum, we'd - * want a toggle to disable driver auto probe for this device. - */ - - mutex_lock(&group->unbound_lock); - list_for_each_entry(unbound, - &group->unbound_list, unbound_next) { - if (dev == unbound->dev) { - list_del(&unbound->unbound_next); - kfree(unbound); - break; - } - } - mutex_unlock(&group->unbound_lock); - break; } return NOTIFY_OK; } @@ -889,29 +838,10 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, void vfio_unregister_group_dev(struct vfio_device *device) { struct vfio_group *group = device->group; - struct vfio_unbound_dev *unbound; unsigned int i = 0; bool interrupted = false; long rc; - /* - * When the device is removed from the group, the group suddenly - * becomes non-viable; the device has a driver (until the unbind - * completes), but it's not present in the group. This is bad news - * for any external users that need to re-acquire a group reference - * in order to match and release their existing reference. To - * solve this, we track such devices on the unbound_list to bridge - * the gap until they're fully unbound. - */ - unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); - if (unbound) { - unbound->dev = device->dev; - mutex_lock(&group->unbound_lock); - list_add(&unbound->unbound_next, &group->unbound_list); - mutex_unlock(&group->unbound_lock); - } - WARN_ON(!unbound); - vfio_device_put(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { From 3b86f317c9c71c9fe17409db5743b6a27114f248 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:59 +0800 Subject: [PATCH 419/803] vfio: Remove iommu group notifier The iommu core and driver core have been enhanced to avoid unsafe driver binding to a live group after iommu_group_set_dma_owner(PRIVATE_USER) has been called. There's no need to register iommu group notifier. This removes the iommu group notifer which contains BUG_ON() and WARN(). Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220418005000.897664-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/vfio/vfio.c | 147 -------------------------------------------- 1 file changed, 147 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index b2f19d17d0c3..0c766384cee0 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -71,7 +71,6 @@ struct vfio_group { struct vfio_container *container; struct list_head device_list; struct mutex device_lock; - struct notifier_block nb; struct list_head vfio_next; struct list_head container_next; atomic_t opened; @@ -274,8 +273,6 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) } EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); -static int vfio_iommu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data); static void vfio_group_get(struct vfio_group *group); /* @@ -395,13 +392,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, goto err_put; } - group->nb.notifier_call = vfio_iommu_group_notifier; - err = iommu_group_register_notifier(iommu_group, &group->nb); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - mutex_lock(&vfio.group_lock); /* Did we race creating this group? */ @@ -422,7 +412,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, err_unlock: mutex_unlock(&vfio.group_lock); - iommu_group_unregister_notifier(group->iommu_group, &group->nb); err_put: put_device(&group->dev); return ret; @@ -447,7 +436,6 @@ static void vfio_group_put(struct vfio_group *group) cdev_device_del(&group->cdev, &group->dev); mutex_unlock(&vfio.group_lock); - iommu_group_unregister_notifier(group->iommu_group, &group->nb); put_device(&group->dev); } @@ -503,141 +491,6 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, return NULL; } -/* - * Some drivers, like pci-stub, are only used to prevent other drivers from - * claiming a device and are therefore perfectly legitimate for a user owned - * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping - * of the device, but it does prevent the user from having direct access to - * the device, which is useful in some circumstances. - * - * We also assume that we can include PCI interconnect devices, ie. bridges. - * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge - * then all of the downstream devices will be part of the same IOMMU group as - * the bridge. Thus, if placing the bridge into the user owned IOVA space - * breaks anything, it only does so for user owned devices downstream. Note - * that error notification via MSI can be affected for platforms that handle - * MSI within the same IOVA space as DMA. - */ -static const char * const vfio_driver_allowed[] = { "pci-stub" }; - -static bool vfio_dev_driver_allowed(struct device *dev, - struct device_driver *drv) -{ - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return true; - } - - return match_string(vfio_driver_allowed, - ARRAY_SIZE(vfio_driver_allowed), - drv->name) >= 0; -} - -/* - * A vfio group is viable for use by userspace if all devices are in - * one of the following states: - * - driver-less - * - bound to a vfio driver - * - bound to an otherwise allowed driver - * - a PCI interconnect device - * - * We use two methods to determine whether a device is bound to a vfio - * driver. The first is to test whether the device exists in the vfio - * group. The second is to test if the device exists on the group - * unbound_list, indicating it's in the middle of transitioning from - * a vfio driver to driver-less. - */ -static int vfio_dev_viable(struct device *dev, void *data) -{ - struct vfio_group *group = data; - struct vfio_device *device; - struct device_driver *drv = READ_ONCE(dev->driver); - - if (!drv || vfio_dev_driver_allowed(dev, drv)) - return 0; - - device = vfio_group_get_device(group, dev); - if (device) { - vfio_device_put(device); - return 0; - } - - return -EINVAL; -} - -/* - * Async device support - */ -static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) -{ - struct vfio_device *device; - - /* Do we already know about it? We shouldn't */ - device = vfio_group_get_device(group, dev); - if (WARN_ON_ONCE(device)) { - vfio_device_put(device); - return 0; - } - - /* Nothing to do for idle groups */ - if (!atomic_read(&group->container_users)) - return 0; - - /* TODO Prevent device auto probing */ - dev_WARN(dev, "Device added to live group %d!\n", - iommu_group_id(group->iommu_group)); - - return 0; -} - -static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) -{ - /* We don't care what happens when the group isn't in use */ - if (!atomic_read(&group->container_users)) - return 0; - - return vfio_dev_viable(dev, group); -} - -static int vfio_iommu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct vfio_group *group = container_of(nb, struct vfio_group, nb); - struct device *dev = data; - - switch (action) { - case IOMMU_GROUP_NOTIFY_ADD_DEVICE: - vfio_group_nb_add_dev(group, dev); - break; - case IOMMU_GROUP_NOTIFY_DEL_DEVICE: - /* - * Nothing to do here. If the device is in use, then the - * vfio sub-driver should block the remove callback until - * it is unused. If the device is unused or attached to a - * stub driver, then it should be released and we don't - * care that it will be going away. - */ - break; - case IOMMU_GROUP_NOTIFY_BIND_DRIVER: - dev_dbg(dev, "%s: group %d binding to driver\n", __func__, - iommu_group_id(group->iommu_group)); - break; - case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: - dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__, - iommu_group_id(group->iommu_group), dev->driver->name); - BUG_ON(vfio_group_nb_verify(group, dev)); - break; - case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: - dev_dbg(dev, "%s: group %d unbinding from driver %s\n", - __func__, iommu_group_id(group->iommu_group), - dev->driver->name); - break; - } - return NOTIFY_OK; -} - /* * VFIO driver API */ From a5f1bd1afacd7b1e088f93f66af5453df0d8be9a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:50:00 +0800 Subject: [PATCH 420/803] iommu: Remove iommu group changes notifier The iommu group changes notifer is not referenced in the tree. Remove it to avoid dead code. Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220418005000.897664-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 75 ------------------------------------------- include/linux/iommu.h | 23 ------------- 2 files changed, 98 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index eba8e8ccf19d..0c42ece25854 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +39,6 @@ struct iommu_group { struct kobject *devices_kobj; struct list_head devices; struct mutex mutex; - struct blocking_notifier_head notifier; void *iommu_data; void (*iommu_data_release)(void *iommu_data); char *name; @@ -632,7 +630,6 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); if (ret < 0) { @@ -905,10 +902,6 @@ rename: if (ret) goto err_put_group; - /* Notify any listeners about change to group. */ - blocking_notifier_call_chain(&group->notifier, - IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); - trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); @@ -950,10 +943,6 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); - /* Pre-notify listeners that a device is being removed. */ - blocking_notifier_call_chain(&group->notifier, - IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); - mutex_lock(&group->mutex); list_for_each_entry(tmp_device, &group->devices, list) { if (tmp_device->dev == dev) { @@ -1075,36 +1064,6 @@ void iommu_group_put(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_put); -/** - * iommu_group_register_notifier - Register a notifier for group changes - * @group: the group to watch - * @nb: notifier block to signal - * - * This function allows iommu group users to track changes in a group. - * See include/linux/iommu.h for actions sent via this notifier. Caller - * should hold a reference to the group throughout notifier registration. - */ -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&group->notifier, nb); -} -EXPORT_SYMBOL_GPL(iommu_group_register_notifier); - -/** - * iommu_group_unregister_notifier - Unregister a notifier - * @group: the group to watch - * @nb: notifier block to signal - * - * Unregister a previously registered group notifier block. - */ -int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&group->notifier, nb); -} -EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); - /** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device @@ -1650,14 +1609,8 @@ static int remove_iommu_group(struct device *dev, void *data) static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { - unsigned long group_action = 0; struct device *dev = data; - struct iommu_group *group; - /* - * ADD/DEL call into iommu driver ops if provided, which may - * result in ADD/DEL notifiers to group->notifier - */ if (action == BUS_NOTIFY_ADD_DEVICE) { int ret; @@ -1668,34 +1621,6 @@ static int iommu_bus_notifier(struct notifier_block *nb, return NOTIFY_OK; } - /* - * Remaining BUS_NOTIFYs get filtered and republished to the - * group, if anyone is listening - */ - group = iommu_group_get(dev); - if (!group) - return 0; - - switch (action) { - case BUS_NOTIFY_BIND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER; - break; - case BUS_NOTIFY_BOUND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER; - break; - case BUS_NOTIFY_UNBIND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER; - break; - case BUS_NOTIFY_UNBOUND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER; - break; - } - - if (group_action) - blocking_notifier_call_chain(&group->notifier, - group_action, dev); - - iommu_group_put(group); return 0; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 77972ef978b5..6ef2df258673 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -407,13 +407,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ -#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ -#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ -#define IOMMU_GROUP_NOTIFY_BOUND_DRIVER 4 /* Post Driver bind */ -#define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER 5 /* Pre Driver unbind */ -#define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER 6 /* Post Driver unbind */ - extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); @@ -478,10 +471,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb); -extern int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb); extern int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, void *data); @@ -878,18 +867,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return 0; -} - static inline int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, From 5b1553bf18de019388c5e9de03d9330f92507695 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 22 Apr 2022 14:21:03 -0500 Subject: [PATCH 421/803] dt-bindings: iommu: Drop client node in examples There's no need to show consumer side in provider examples. The ones used here are undocumented or undocumented in schemas which results in warnings. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220422192104.2591462-1-robh@kernel.org Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/mediatek,iommu.yaml | 10 ---------- .../devicetree/bindings/iommu/samsung,sysmmu.yaml | 10 ---------- 2 files changed, 20 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 97e8c471a5e8..e0389539194f 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -173,13 +173,3 @@ examples: <&larb3>, <&larb4>, <&larb5>; #iommu-cells = <1>; }; - - - | - #include - - /* Example for a client device */ - display { - compatible = "mediatek,mt8173-disp"; - iommus = <&iommu M4U_PORT_DISP_OVL0>, - <&iommu M4U_PORT_DISP_RDMA0>; - }; diff --git a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml index af51b91c893e..24d8d129dc4c 100644 --- a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml +++ b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.yaml @@ -86,16 +86,6 @@ examples: - | #include - gsc_0: scaler@13e00000 { - compatible = "samsung,exynos5-gsc"; - reg = <0x13e00000 0x1000>; - interrupts = <0 85 0>; - power-domains = <&pd_gsc>; - clocks = <&clock CLK_GSCL0>; - clock-names = "gscl"; - iommus = <&sysmmu_gsc0>; - }; - sysmmu_gsc0: iommu@13e80000 { compatible = "samsung,exynos-sysmmu"; reg = <0x13E80000 0x1000>; From 4e13f6706d5aee1a6b835a44f6cf4971a921dcb8 Mon Sep 17 00:00:00 2001 From: Timothy Hayes Date: Thu, 21 Apr 2022 17:52:03 +0100 Subject: [PATCH 422/803] perf arm-spe: Fix addresses of synthesized SPE events This patch corrects a bug whereby synthesized events from SPE samples are missing virtual addresses. Fixes: 54f7815efef7fad9 ("perf arm-spe: Fill address info for samples") Reviewed-by: Leo Yan Signed-off-by: Timothy Hayes Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Namhyung Kim Cc: netdev@vger.kernel.org Cc: Song Liu Cc: Will Deacon Cc: Yonghong Song Link: https://lore.kernel.org/r/20220421165205.117662-2-timothy.hayes@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d2b64e3f588b..151cc38a171c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -1036,7 +1036,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | - PERF_SAMPLE_WEIGHT; + PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else From 7599b70a3c85357d3a57319ae90d419adea0544b Mon Sep 17 00:00:00 2001 From: Timothy Hayes Date: Thu, 21 Apr 2022 17:52:04 +0100 Subject: [PATCH 423/803] perf arm-spe: Fix SPE events with phys addresses This patch corrects a bug whereby SPE collection is invoked with pa_enable=1 but synthesized events fail to show physical addresses. Reviewed-by: Leo Yan Signed-off-by: Timothy Hayes Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Song Liu Cc: Will Deacon Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20220421165205.117662-3-timothy.hayes@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/arm-spe.c | 10 ++++++++++ tools/perf/util/arm-spe.c | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index af4d63af8072..e8b577d33e53 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, bool privileged = perf_event_paranoid_check(-1); struct evsel *tracking_evsel; int err; + u64 bit; sper->evlist = evlist; @@ -245,6 +246,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* + * The PHYS_ADDR flag does not affect the driver behaviour, it is used to + * inform that the resulting output's SPE samples contain physical addresses + * where applicable. + */ + bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable"); + if (arm_spe_evsel->core.attr.config & bit) + evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 151cc38a171c..1a80151baed9 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -1033,7 +1033,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) memset(&attr, 0, sizeof(struct perf_event_attr)); attr.size = sizeof(struct perf_event_attr); attr.type = PERF_TYPE_HARDWARE; - attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type = evsel->core.attr.sample_type & + (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; From 3b9a8c8b9ac2a57df8b7f05c6ffa89652ac1ba8c Mon Sep 17 00:00:00 2001 From: Timothy Hayes Date: Thu, 21 Apr 2022 17:52:05 +0100 Subject: [PATCH 424/803] perf test: Add perf_event_attr test for Arm SPE Adds a perf_event_attr test for Arm SPE in which the presence of physical addresses are checked when SPE unit is run with pa_enable=1. Reviewed-by: Leo Yan Signed-off-by: Timothy Hayes Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Song Liu Cc: Will Deacon Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20220421165205.117662-4-timothy.hayes@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/README | 1 + .../perf/tests/attr/test-record-spe-physical-address | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tools/perf/tests/attr/test-record-spe-physical-address diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index 454505d343fa..eb3f7d4bb324 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -60,6 +60,7 @@ Following tests are defined (with perf commands): perf record -R kill (test-record-raw) perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period) perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term) + perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address) perf stat -e cycles kill (test-stat-basic) perf stat kill (test-stat-default) perf stat -d kill (test-stat-detailed-1) diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/attr/test-record-spe-physical-address new file mode 100644 index 000000000000..7ebcf5012ce3 --- /dev/null +++ b/tools/perf/tests/attr/test-record-spe-physical-address @@ -0,0 +1,12 @@ +[config] +command = record +args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1 +ret = 1 +arch = aarch64 + +[event-10:base-record-spe] +# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR +sample_type=622727 + +# dummy event +[event-1:base-record-spe] \ No newline at end of file From 838425f2defe5262906b698752d28fd2fca1aac2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Apr 2022 17:40:46 -0700 Subject: [PATCH 425/803] perf symbol: Pass is_kallsyms to symbols__fixup_end() The symbol fixup is necessary for symbols in kallsyms since they don't have size info. So we use the next symbol's address to calculate the size. Now it's also used for user binaries because sometimes they miss size for hand-written asm functions. There's a arch-specific function to handle kallsyms differently but currently it cannot distinguish kallsyms from others. Pass this information explicitly to handle it properly. Note that those arch functions will be moved to the generic function so I didn't added it to the arch-functions. Fixes: 3cf6a32f3f2a4594 ("perf symbols: Fix symbol size calculation condition") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Will Deacon Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220416004048.1514900-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 7 ++++--- tools/perf/util/symbol.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 31cd59a2b66e..ecd377938eea 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, false); symbols__fixup_duplicate(&dso->symbols); if (kmap) { /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index dea0fc495185..1b85cc1422a9 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -217,7 +217,8 @@ again: } } -void symbols__fixup_end(struct rb_root_cached *symbols) +void symbols__fixup_end(struct rb_root_cached *symbols, + bool is_kallsyms __maybe_unused) { struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct symbol *curr, *prev; @@ -1467,7 +1468,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, if (kallsyms__delta(kmap, filename, &delta)) return -1; - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, true); symbols__fixup_duplicate(&dso->symbols); if (dso->kernel == DSO_SPACE__KERNEL_GUEST) @@ -1659,7 +1660,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) #undef bfd_asymbol_section #endif - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, false); symbols__fixup_duplicate(&dso->symbols); dso->adjust_symbols = 1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fbf866d82dcc..5fcdd1f94c56 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, bool kernel); void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); -void symbols__fixup_end(struct rb_root_cached *symbols); +void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); From 8799ebce84d672aae1dc3170510f6a3e66f96b11 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Apr 2022 17:40:47 -0700 Subject: [PATCH 426/803] perf symbol: Update symbols__fixup_end() Now arch-specific functions all do the same thing. When it fixes the symbol address it needs to check the boundary between the kernel image and modules. For the last symbol in the previous region, it cannot know the exact size as it's discarded already. Thus it just uses a small page size (4096) and rounds it up like the last symbol. Fixes: 3cf6a32f3f2a4594 ("perf symbols: Fix symbol size calculation condition") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Will Deacon Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220416004048.1514900-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b85cc1422a9..623094e866fd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -217,8 +217,8 @@ again: } } -void symbols__fixup_end(struct rb_root_cached *symbols, - bool is_kallsyms __maybe_unused) +/* Update zero-sized symbols using the address of the next symbol */ +void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) { struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct symbol *curr, *prev; @@ -232,8 +232,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols, prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start || prev->end != curr->start) - arch__symbols__fixup_end(prev, curr); + /* + * On some architecture kernel text segment start is located at + * some low memory address, while modules are located at high + * memory addresses (or vice versa). The gap between end of + * kernel text segment and beginning of first module's text + * segment is very big. Therefore do not fill this gap and do + * not assign it to the kernel dso map (kallsyms). + * + * In kallsyms, it determines module symbols using '[' character + * like in: + * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] + */ + if (prev->end == prev->start) { + /* Last kernel/module symbol mapped to end of page */ + if (is_kallsyms && (!strchr(prev->name, '[') != + !strchr(curr->name, '['))) + prev->end = roundup(prev->end + 4096, 4096); + else + prev->end = curr->start; + + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", + __func__, prev->name, prev->end); + } } /* Last entry */ From a5d20d42a2f2dc2b2f9e9361912062732414090d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Apr 2022 17:40:48 -0700 Subject: [PATCH 427/803] perf symbol: Remove arch__symbols__fixup_end() Now the generic code can handle kallsyms fixup properly so no need to keep the arch-functions anymore. Fixes: 3cf6a32f3f2a4594 ("perf symbols: Fix symbol size calculation condition") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Will Deacon Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220416004048.1514900-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/machine.c | 21 --------------------- tools/perf/arch/powerpc/util/Build | 1 - tools/perf/arch/powerpc/util/machine.c | 25 ------------------------- tools/perf/arch/s390/util/machine.c | 16 ---------------- tools/perf/util/symbol.c | 5 ----- tools/perf/util/symbol.h | 1 - 6 files changed, 69 deletions(-) delete mode 100644 tools/perf/arch/powerpc/util/machine.c diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index d2ce31e28cd7..41c1596e5207 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -8,27 +8,6 @@ #include "callchain.h" #include "record.h" -/* On arm64, kernel text segment starts at high memory address, - * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory - * address, like 0xffff 0000 00ax xxxx. When only small amount of - * memory is used by modules, gap between end of module's text segment - * and start of kernel text segment may reach 2G. - * Therefore do not fill this gap and do not assign it to the kernel dso map. - */ - -#define SYMBOL_LIMIT (1 << 12) /* 4K */ - -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) -{ - if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) || - (strchr(p->name, '[') == NULL && strchr(c->name, '['))) - /* Limit range of last symbol in module and kernel */ - p->end += SYMBOL_LIMIT; - else - p->end = c->start; - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); -} - void arch__add_leaf_frame_record_opts(struct record_opts *opts) { opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 8a79c4126e5b..0115f3166568 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,4 @@ perf-y += header.o -perf-y += machine.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c deleted file mode 100644 index e652a1aa8132..000000000000 --- a/tools/perf/arch/powerpc/util/machine.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include // page_size -#include "debug.h" -#include "symbol.h" - -/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000 - * whereas the modules are located at very high memory addresses, - * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment - * and beginning of first module's text segment is very high. - * Therefore do not fill this gap and do not assign it to the kernel dso map. - */ - -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) -{ - if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) - /* Limit the range of last kernel symbol */ - p->end += page_size; - else - p->end = c->start; - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); -} diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 7644a4f6d4a4..98bc3f39d5f3 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) return 0; } - -/* On s390 kernel text segment start is located at very low memory addresses, - * for example 0x10000. Modules are located at very high memory addresses, - * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment - * and beginning of first module's text segment is very big. - * Therefore do not fill this gap and do not assign it to the kernel dso map. - */ -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) -{ - if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) - /* Last kernel symbol mapped to end of page */ - p->end = roundup(p->end, page_size); - else - p->end = c->start; - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); -} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 623094e866fd..f72baf636724 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str) return tail - str; } -void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c) -{ - p->end = c->start; -} - const char * __weak arch__normalize_symbol_name(const char *name) { return name; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5fcdd1f94c56..0b893dcc8ea6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c); int arch__compare_symbol_names(const char *namea, const char *nameb); int arch__compare_symbol_names_n(const char *namea, const char *nameb, unsigned int n); From 743b83f15d4069ea57c3e40996bf4a1077e0cdc1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 28 Apr 2022 09:39:21 +0200 Subject: [PATCH 428/803] netfilter: nft_socket: only do sk lookups when indev is available Check if the incoming interface is available and NFT_BREAK in case neither skb->sk nor input device are set. Because nf_sk_lookup_slow*() assume packet headers are in the 'in' direction, use in postrouting is not going to yield a meaningful result. Same is true for the forward chain, so restrict the use to prerouting, input and output. Use in output work if a socket is already attached to the skb. Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Reported-and-tested-by: Topi Miettinen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 52 ++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 6d9e8e0a3a7d..05ae5a338b6f 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo } #endif +static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sk_buff *skb = pkt->skb; + struct sock *sk = NULL; + + if (!indev) + return NULL; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } + + return sk; +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr, sk = NULL; if (!sk) - switch(nft_pf(pkt)) { - case NFPROTO_IPV4: - sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); - break; -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - case NFPROTO_IPV6: - sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); - break; -#endif - default: - WARN_ON_ONCE(1); - regs->verdict.code = NFT_BREAK; - return; - } + sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; @@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track, return nft_expr_reduce_bitwise(track, expr); } +static int nft_socket_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT)); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; From 6043257b1de069fbb5a2a52d7211c0275bc8c0e0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 11 Apr 2022 12:16:05 -0300 Subject: [PATCH 429/803] iommu: Introduce the domain op enforce_cache_coherency() This new mechanism will replace using IOMMU_CAP_CACHE_COHERENCY and IOMMU_CACHE to control the no-snoop blocking behavior of the IOMMU. Currently only Intel and AMD IOMMUs are known to support this feature. They both implement it as an IOPTE bit, that when set, will cause PCIe TLPs to that IOVA with the no-snoop bit set to be treated as though the no-snoop bit was clear. The new API is triggered by calling enforce_cache_coherency() before mapping any IOVA to the domain which globally switches on no-snoop blocking. This allows other implementations that might block no-snoop globally and outside the IOPTE - AMD also documents such a HW capability. Leave AMD out of sync with Intel and have it block no-snoop even for in-kernel users. This can be trivially resolved in a follow up patch. Only VFIO needs to call this API because it does not have detailed control over the device to avoid requesting no-snoop behavior at the device level. Other places using domains with real kernel drivers should simply avoid asking their devices to set the no-snoop bit. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Acked-by: Robin Murphy Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 7 +++++++ drivers/iommu/intel/iommu.c | 14 +++++++++++++- include/linux/intel-iommu.h | 1 + include/linux/iommu.h | 4 ++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 038e104b922c..840831d5d2ad 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2266,6 +2266,12 @@ static int amd_iommu_def_domain_type(struct device *dev) return 0; } +static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain) +{ + /* IOMMU_PTE_FC is always set */ + return true; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -2288,6 +2294,7 @@ const struct iommu_ops amd_iommu_ops = { .flush_iotlb_all = amd_iommu_flush_iotlb_all, .iotlb_sync = amd_iommu_iotlb_sync, .free = amd_iommu_domain_free, + .enforce_cache_coherency = amd_iommu_enforce_cache_coherency, } }; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0edf6084dc14..161199f62270 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4422,7 +4422,8 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) + if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) || + dmar_domain->force_snooping) prot |= DMA_PTE_SNP; max_addr = iova + size; @@ -4545,6 +4546,16 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + if (!dmar_domain->iommu_snooping) + return false; + dmar_domain->force_snooping = true; + return true; +} + static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -4900,6 +4911,7 @@ const struct iommu_ops intel_iommu_ops = { .iotlb_sync = intel_iommu_tlb_sync, .iova_to_phys = intel_iommu_iova_to_phys, .free = intel_iommu_domain_free, + .enforce_cache_coherency = intel_iommu_enforce_cache_coherency, } }; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2f9891cb3d00..4c2baf2446c2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -540,6 +540,7 @@ struct dmar_domain { u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 iommu_snooping: 1; /* indicate snooping control feature */ + u8 force_snooping : 1; /* Create IOPTEs with snoop control */ struct list_head devices; /* all devices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4123693ae319..c7ad6b10e261 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -274,6 +274,9 @@ struct iommu_ops { * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address + * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, + * including no-snoop TLPs on PCIe or other platform + * specific mechanisms. * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @free: Release the domain after use. @@ -302,6 +305,7 @@ struct iommu_domain_ops { phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); + bool (*enforce_cache_coherency)(struct iommu_domain *domain); int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); From 71cfafda9c9bd9812cdb62ddb94daf65a1af12c1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 11 Apr 2022 12:16:06 -0300 Subject: [PATCH 430/803] vfio: Move the Intel no-snoop control off of IOMMU_CACHE IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache coherent" and is used by the DMA API. The definition allows for special non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe TLPs - so long as this behavior is opt-in by the device driver. The flag is mainly used by the DMA API to synchronize the IOMMU setting with the expected cache behavior of the DMA master. eg based on dev_is_dma_coherent() in some case. For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be cache coherent' which has the practical effect of causing the IOMMU to ignore the no-snoop bit in a PCIe TLP. x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag. Instead use the new domain op enforce_cache_coherency() which causes every IOPTE created in the domain to have the no-snoop blocking behavior. Reconfigure VFIO to always use IOMMU_CACHE and call enforce_cache_coherency() to operate the special Intel behavior. Remove the IOMMU_CACHE test from Intel IOMMU. Ultimately VFIO plumbs the result of enforce_cache_coherency() back into the x86 platform code through kvm_arch_register_noncoherent_dma() which controls if the WBINVD instruction is available in the guest. No other archs implement kvm_arch_register_noncoherent_dma() nor are there any other known consumers of VFIO_DMA_CC_IOMMU that might be affected by the user visible result change on non-x86 archs. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Acked-by: Alex Williamson Acked-by: Robin Murphy Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 ++----- drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++----------- include/linux/intel-iommu.h | 1 - 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 161199f62270..38441cb06c8c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -641,7 +641,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); - domain->iommu_snooping = domain_update_iommu_snooping(NULL); domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); /* @@ -4283,7 +4282,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->agaw = width_to_agaw(adjust_width); domain->iommu_coherency = false; - domain->iommu_snooping = false; domain->iommu_superpage = 0; domain->max_addr = 0; @@ -4422,8 +4420,7 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) || - dmar_domain->force_snooping) + if (dmar_domain->force_snooping) prot |= DMA_PTE_SNP; max_addr = iova + size; @@ -4550,7 +4547,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - if (!dmar_domain->iommu_snooping) + if (!domain_update_iommu_snooping(NULL)) return false; dmar_domain->force_snooping = true; return true; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9394aa9444c1..c13b9290e357 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -84,8 +84,8 @@ struct vfio_domain { struct iommu_domain *domain; struct list_head next; struct list_head group_list; - int prot; /* IOMMU_CACHE */ - bool fgsp; /* Fine-grained super pages */ + bool fgsp : 1; /* Fine-grained super pages */ + bool enforce_cache_coherency : 1; }; struct vfio_dma { @@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | d->prot); + npage << PAGE_SHIFT, prot | IOMMU_CACHE); if (ret) goto unwind; @@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | domain->prot); + size, dma->prot | IOMMU_CACHE); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | domain->prot); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); @@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) - domain->prot |= IOMMU_CACHE; + /* + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with + * no-snoop set) then VFIO always turns this feature on because on Intel + * platforms it optimizes KVM to disable wbinvd emulation. + */ + if (domain->domain->ops->enforce_cache_coherency) + domain->enforce_cache_coherency = + domain->domain->ops->enforce_cache_coherency( + domain->domain); /* * Try to match an existing compatible domain. We don't want to @@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, */ list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && - d->prot == domain->prot) { + d->enforce_cache_coherency == + domain->enforce_cache_coherency) { iommu_detach_group(domain->domain, group->iommu_group); if (!iommu_attach_group(d->domain, group->iommu_group)) { @@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) { struct vfio_domain *domain; int ret = 1; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) { - if (!(domain->prot & IOMMU_CACHE)) { + if (!(domain->enforce_cache_coherency)) { ret = 0; break; } @@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_enforce_cache_coherency(iommu); default: return 0; } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4c2baf2446c2..72e5d7900e71 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -539,7 +539,6 @@ struct dmar_domain { u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ - u8 iommu_snooping: 1; /* indicate snooping control feature */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ struct list_head devices; /* all devices' list */ From f78dc1dad829e505d83e33dc0879887f074c52e1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 11 Apr 2022 12:16:07 -0300 Subject: [PATCH 431/803] iommu: Redefine IOMMU_CAP_CACHE_COHERENCY as the cap flag for IOMMU_CACHE While the comment was correct that this flag was intended to convey the block no-snoop support in the IOMMU, it has become widely implemented and used to mean the IOMMU supports IOMMU_CACHE as a map flag. Only the Intel driver was different. Now that the Intel driver is using enforce_cache_coherency() update the comment to make it clear that IOMMU_CAP_CACHE_COHERENCY is only about IOMMU_CACHE. Fix the Intel driver to return true since IOMMU_CACHE always works. The two places that test this flag, usnic and vdpa, are both assigning userspace pages to a driver controlled iommu_domain and require IOMMU_CACHE behavior as they offer no way for userspace to synchronize caches. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Acked-by: Robin Murphy Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- include/linux/iommu.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 38441cb06c8c..efcecfa5952a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4556,7 +4556,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) - return domain_update_iommu_snooping(NULL); + return true; if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c7ad6b10e261..575ab27ede5b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -103,8 +103,7 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain) } enum iommu_cap { - IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA - transactions */ + IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for From e8ae0e140c05319fa47bb3bf615c3a585f404b9d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 11 Apr 2022 12:16:08 -0300 Subject: [PATCH 432/803] vfio: Require that devices support DMA cache coherence IOMMU_CACHE means that normal DMAs do not require any additional coherency mechanism and is the basic uAPI that VFIO exposes to userspace. For instance VFIO applications like DPDK will not work if additional coherency operations are required. Therefore check IOMMU_CAP_CACHE_COHERENCY like vdpa & usnic do before allowing an IOMMU backed VFIO device to be created. Reviewed-by: Kevin Tian Acked-by: Alex Williamson Acked-by: Robin Murphy Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/vfio/vfio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index a4555014bd1e..9edad767cfda 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -815,6 +815,13 @@ static int __vfio_register_dev(struct vfio_device *device, int vfio_register_group_dev(struct vfio_device *device) { + /* + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to + * restore cache coherency. + */ + if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY)) + return -EINVAL; + return __vfio_register_dev(device, vfio_group_find_or_alloc(device->dev)); } From aad41a7d7cf6c6fa804c872a2480f8e541da37cf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Apr 2022 11:08:13 -0400 Subject: [PATCH 433/803] SUNRPC: Don't leak sockets in xs_local_connect() If there is still a closed socket associated with the transport, then we need to trigger an autoclose before we can set up a new connection. Reported-by: wanghai (M) Fixes: f00432063db1 ("SUNRPC: Ensure we flush any closed sockets before xs_xprt_free()") Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ab64ea46870..f9849b297ea3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1950,6 +1950,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; + if (transport->file) + goto force_disconnect; + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the @@ -1962,11 +1965,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) */ task->tk_rpc_status = -ENOTCONN; rpc_exit(task, -ENOTCONN); - return; + goto out_wake; } ret = xs_local_setup_socket(transport); if (ret && !RPC_IS_SOFTCONN(task)) msleep_interruptible(15000); + return; +force_disconnect: + xprt_force_disconnect(xprt); +out_wake: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) From 126858db81a5094d20885bc59621c3b9497f9048 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Apr 2022 09:36:06 -0700 Subject: [PATCH 434/803] MAINTAINERS: Update BNXT entry with firmware files There appears to be a maintainer gap for BNXT TEE firmware files which causes some patches to be missed. Update the entry for the BNXT Ethernet controller with its companion firmware files. Signed-off-by: Florian Fainelli Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20220427163606.126154-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3c0f56b44c61..e86a8e2ec3f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3913,7 +3913,9 @@ BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER M: Michael Chan L: netdev@vger.kernel.org S: Supported +F: drivers/firmware/broadcom/tee_bnxt_fw.c F: drivers/net/ethernet/broadcom/bnxt/ +F: include/linux/firmware/broadcom/tee_bnxt_fw.h BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER M: Arend van Spriel From f049efc7f7cd2f3c419f55040928eaefb13b3636 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 27 Apr 2022 10:31:52 -0700 Subject: [PATCH 435/803] ixgbe: ensure IPsec VF<->PF compatibility The VF driver can forward any IPsec flags and such makes the function is not extendable and prone to backward/forward incompatibility. If new software runs on VF, it won't know that PF configured something completely different as it "knows" only XFRM_OFFLOAD_INBOUND flag. Fixes: eda0333ac293 ("ixgbe: add VF IPsec management") Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky Reviewed-by: Shannon Nelson Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220427173152.443102-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index e596e1a9fc75..69d11ff7677d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* Tx IPsec offload doesn't seem to work on this * device, so block these requests for now. */ - if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) { + sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6; + if (sam->flags != XFRM_OFFLOAD_INBOUND) { err = -EOPNOTSUPP; goto err_out; } From 66a2f5ef68faaf950746747d790a0c95f7ec96d2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 27 Apr 2022 23:30:17 +0300 Subject: [PATCH 436/803] net: enetc: allow tc-etf offload even with NETIF_F_CSUM_MASK The Time-Specified Departure feature is indeed mutually exclusive with TX IP checksumming in ENETC, but TX checksumming in itself is broken and was removed from this driver in commit 82728b91f124 ("enetc: Remove Tx checksumming offload code"). The blamed commit declared NETIF_F_HW_CSUM in dev->features to comply with software TSO's expectations, and still did the checksumming in software by calling skb_checksum_help(). So there isn't any restriction for the Time-Specified Departure feature. However, enetc_setup_tc_txtime() doesn't understand that, and blindly looks for NETIF_F_CSUM_MASK. Instead of checking for things which can literally never happen in the current code base, just remove the check and let the driver offload tc-etf qdiscs. Fixes: acede3c5dad5 ("net: enetc: declare NETIF_F_HW_CSUM and do it in software") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220427203017.1291634-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 79afb1d7289b..9182631856d5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -297,10 +297,6 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) if (tc < 0 || tc >= priv->num_tx_rings) return -EINVAL; - /* Do not support TXSTART and TX CSUM offload simutaniously */ - if (ndev->features & NETIF_F_CSUM_MASK) - return -EBUSY; - /* TSD and Qbv are mutually exclusive in hardware */ if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) return -EBUSY; From aeaf59b78712c7a1827c76f086acff4f586e072f Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Wed, 27 Apr 2022 18:51:46 -0500 Subject: [PATCH 437/803] Revert "ibmvnic: Add ethtool private flag for driver-defined queue limits" This reverts commit 723ad916134784b317b72f3f6cf0f7ba774e5dae When client requests channel or ring size larger than what the server can support the server will cap the request to the supported max. So, the client would not be able to successfully request resources that exceed the server limit. Fixes: 723ad9161347 ("ibmvnic: Add ethtool private flag for driver-defined queue limits") Signed-off-by: Dany Madden Link: https://lore.kernel.org/r/20220427235146.23189-1-drt@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 141 +++++++++-------------------- drivers/net/ethernet/ibm/ibmvnic.h | 6 -- 2 files changed, 41 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 77683909ca3d..5c5931dba51d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3210,13 +3210,8 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; - ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; - } else { - ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - } + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; @@ -3231,23 +3226,21 @@ static int ibmvnic_set_ringparam(struct net_device *netdev, struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + adapter->desired.rx_entries = ring->rx_pending; adapter->desired.tx_entries = ring->tx_pending; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending || - adapter->req_tx_entries_per_subcrq != ring->tx_pending)) - netdev_info(netdev, - "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - ring->rx_pending, ring->tx_pending, - adapter->req_rx_add_entries_per_subcrq, - adapter->req_tx_entries_per_subcrq); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_channels(struct net_device *netdev, @@ -3255,14 +3248,8 @@ static void ibmvnic_get_channels(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - channels->max_rx = adapter->max_rx_queues; - channels->max_tx = adapter->max_tx_queues; - } else { - channels->max_rx = IBMVNIC_MAX_QUEUES; - channels->max_tx = IBMVNIC_MAX_QUEUES; - } - + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->req_rx_queues; @@ -3275,22 +3262,11 @@ static int ibmvnic_set_channels(struct net_device *netdev, struct ethtool_channels *channels) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; adapter->desired.rx_queues = channels->rx_count; adapter->desired.tx_queues = channels->tx_count; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_queues != channels->rx_count || - adapter->req_tx_queues != channels->tx_count)) - netdev_info(netdev, - "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - channels->rx_count, channels->tx_count, - adapter->req_rx_queues, adapter->req_tx_queues); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -3298,43 +3274,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) struct ibmvnic_adapter *adapter = netdev_priv(dev); int i; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); - i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); - - for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, - "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; - } - - for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; - } - break; - - case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, - ibmvnic_priv_flags[i]); - break; - default: + if (stringset != ETH_SS_STATS) return; + + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; } } @@ -3347,8 +3312,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(ibmvnic_stats) + adapter->req_tx_queues * NUM_TX_STATS + adapter->req_rx_queues * NUM_RX_STATS; - case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(ibmvnic_priv_flags); default: return -EOPNOTSUPP; } @@ -3401,26 +3364,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, } } -static u32 ibmvnic_get_priv_flags(struct net_device *netdev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - return adapter->priv_flags; -} - -static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES); - - if (which_maxes) - adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES; - else - adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES; - - return 0; -} - static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -3434,8 +3377,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, .get_link_ksettings = ibmvnic_get_link_ksettings, - .get_priv_flags = ibmvnic_get_priv_flags, - .set_priv_flags = ibmvnic_set_priv_flags, }; /* Routines for managing CRQs/sCRQs */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 8f5cefb932dd..1310c861bf83 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -41,11 +41,6 @@ #define IBMVNIC_RESET_DELAY 100 -static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { -#define IBMVNIC_USE_SERVER_MAXES 0x1 - "use-server-maxes" -}; - struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -883,7 +878,6 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; - u32 priv_flags; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; From d9157f6806d1499e173770df1f1b234763de5c79 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Tue, 26 Apr 2022 18:03:39 +0800 Subject: [PATCH 438/803] tcp: fix F-RTO may not work correctly when receiving DSACK Currently DSACK is regarded as a dupack, which may cause F-RTO to incorrectly enter "loss was real" when receiving DSACK. Packetdrill to demonstrate: // Enable F-RTO and TLP 0 `sysctl -q net.ipv4.tcp_frto=2` 0 `sysctl -q net.ipv4.tcp_early_retrans=3` 0 `sysctl -q net.ipv4.tcp_congestion_control=cubic` // Establish a connection +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // RTT 10ms, RTO 210ms +.1 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 <...> +.01 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 // Send 2 data segments +0 write(4, ..., 2000) = 2000 +0 > P. 1:2001(2000) ack 1 // TLP +.022 > P. 1001:2001(1000) ack 1 // Continue to send 8 data segments +0 write(4, ..., 10000) = 10000 +0 > P. 2001:10001(8000) ack 1 // RTO +.188 > . 1:1001(1000) ack 1 // The original data is acked and new data is sent(F-RTO step 2.b) +0 < . 1:1(0) ack 2001 win 257 +0 > P. 10001:12001(2000) ack 1 // D-SACK caused by TLP is regarded as a dupack, this results in // the incorrect judgment of "loss was real"(F-RTO step 3.a) +.022 < . 1:1(0) ack 2001 win 257 // Never-retransmitted data(3001:4001) are acked and // expect to switch to open state(F-RTO step 3.b) +0 < . 1:1(0) ack 4001 win 257 +0 %{ assert tcpi_ca_state == 0, tcpi_ca_state }% Fixes: e33099f96d99 ("tcp: implement RFC5682 F-RTO") Signed-off-by: Pengcheng Yang Acked-by: Neal Cardwell Tested-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/1650967419-2150-1-git-send-email-yangpc@wangsu.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 48f607522860..60f99e9fb6d1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DSACKING_ACK))) { num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) From 32452a3eb8b64e01e2be717f518c0be046975b9d Mon Sep 17 00:00:00 2001 From: Joseph Ravichandran Date: Thu, 28 Apr 2022 12:57:52 -0400 Subject: [PATCH 439/803] io_uring: fix uninitialized field in rw io_kiocb io_rw_init_file does not initialize kiocb->private, so when iocb_bio_iopoll reads kiocb->private it can contain uninitialized data. Fixes: 3e08773c3841 ("block: switch polling to be bio based") Signed-off-by: Joseph Ravichandran Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 92ac50f139cd..e3ae26ff5d1a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3783,6 +3783,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; + kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; From 2c33d775ef4c25c0e1e1cc0fd5496d02f76bfa20 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 28 Apr 2022 08:24:32 +0200 Subject: [PATCH 440/803] timekeeping: Mark NMI safe time accessors as notrace Mark the CLOCK_MONOTONIC fast time accessors as notrace. These functions are used in tracing to retrieve timestamps, so they should not recurse. Fixes: 4498e7467e9e ("time: Parametrize all tk_fast_mono users") Fixes: f09cb9a1808e ("time: Introduce tk_fast_raw") Reported-by: Steven Rostedt Signed-off-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220426175338.3807ca4f@gandalf.local.home/ Link: https://lore.kernel.org/r/20220428062432.61063-1-kurt@linutronix.de --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dcdcb85121e4..3b1398fbddaf 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -482,7 +482,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) * of the following timestamps. Callers need to be aware of that and * deal with it. */ -u64 ktime_get_mono_fast_ns(void) +u64 notrace ktime_get_mono_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_mono); } @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); * Contrary to ktime_get_mono_fast_ns() this is always correct because the * conversion factor is not affected by NTP/PTP correction. */ -u64 ktime_get_raw_fast_ns(void) +u64 notrace ktime_get_raw_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_raw); } From bb300130e47fcefbe938f06dbacaef0312e28416 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Wed, 27 Apr 2022 14:16:19 +0300 Subject: [PATCH 441/803] ath11k: reduce the wait time of 11d scan and hw scan while add interface (cherry picked from commit 1f682dc9fb3790aa7ec27d3d122ff32b1eda1365 in wireless-next) Currently ath11k will wait 11d scan complete while add interface in ath11k_mac_op_add_interface(), when system resume without enable wowlan, ath11k_mac_op_add_interface() is called for each resume, thus it increase the resume time of system. And ath11k_mac_op_hw_scan() after ath11k_mac_op_add_interface() also needs some time cost because the previous 11d scan need more than 5 seconds when 6 GHz is enabled, then the scan started event will indicated to ath11k after the 11d scan completed. While 11d scan/hw scan is running in firmware, if ath11k update channel list to firmware by WMI_SCAN_CHAN_LIST_CMDID, then firmware will cancel the current scan which is running, it lead the scan failed. The patch commit 9dcf6808b253 ("ath11k: add 11d scan offload support") used finish_11d_scan/finish_11d_ch_list/pending_11d to synchronize the 11d scan/hw scan/channel list between ath11k/firmware/mac80211 and to avoid the scan fail. Add wait operation before ath11k update channel list, function ath11k_reg_update_chan_list() will wait until the current 11d scan/hw scan completed. And remove the wait operation of start 11d scan and waiting channel list complete in hw scan. After these changes, resume time cost reduce about 5 seconds and also hw scan time cost reduced obviously, and scan failed not seen. The 11d scan is sent to firmware only one time for each interface added in mac.c, and it is moved after the 1st hw scan because 11d scan will cost some time and thus leads the AP scan result update to UI delay. Currently priority of ath11k's hw scan is WMI_SCAN_PRIORITY_LOW, and priority of 11d scan in firmware is WMI_SCAN_PRIORITY_MEDIUM, then the 11d scan which sent after hw scan will cancel the hw scan in firmware, so change the priority to WMI_SCAN_PRIORITY_MEDIUM for the hw scan which is in front of the 11d scan, thus it will not happen scan cancel in firmware. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 Fixes: 9dcf6808b253 ("ath11k: add 11d scan offload support") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215777 Cc: Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220328035832.14122-1-quic_wgong@quicinc.com Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220427111619.9758-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/core.c | 1 + drivers/net/wireless/ath/ath11k/core.h | 13 +++-- drivers/net/wireless/ath/ath11k/mac.c | 71 +++++++++++--------------- drivers/net/wireless/ath/ath11k/mac.h | 2 +- drivers/net/wireless/ath/ath11k/reg.c | 43 ++++++++++------ drivers/net/wireless/ath/ath11k/reg.h | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 16 +++++- 7 files changed, 84 insertions(+), 64 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 71eb7d04c3bf..90a5df1fbdbd 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -1288,6 +1288,7 @@ static void ath11k_core_restart(struct work_struct *work) ieee80211_stop_queues(ar->hw); ath11k_mac_drain_tx(ar); + complete(&ar->completed_11d_scan); complete(&ar->scan.started); complete(&ar->scan.completed); complete(&ar->peer_assoc_done); diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index c0228e91a596..b8634eddf49a 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -38,6 +38,8 @@ extern unsigned int ath11k_frame_mode; +#define ATH11K_SCAN_TIMEOUT_HZ (20 * HZ) + #define ATH11K_MON_TIMER_INTERVAL 10 enum ath11k_supported_bw { @@ -189,6 +191,12 @@ enum ath11k_scan_state { ATH11K_SCAN_ABORTING, }; +enum ath11k_11d_state { + ATH11K_11D_IDLE, + ATH11K_11D_PREPARING, + ATH11K_11D_RUNNING, +}; + enum ath11k_dev_flags { ATH11K_CAC_RUNNING, ATH11K_FLAG_CORE_REGISTERED, @@ -607,9 +615,8 @@ struct ath11k { bool dfs_block_radar_events; struct ath11k_thermal thermal; u32 vdev_id_11d_scan; - struct completion finish_11d_scan; - struct completion finish_11d_ch_list; - bool pending_11d; + struct completion completed_11d_scan; + enum ath11k_11d_state state_11d; bool regdom_set_by_user; int hw_rate_code; u8 twt_enabled; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e6b34b0d61bd..58ff761393db 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3601,26 +3601,6 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, if (ret) goto exit; - /* Currently the pending_11d=true only happened 1 time while - * wlan interface up in ath11k_mac_11d_scan_start(), it is called by - * ath11k_mac_op_add_interface(), after wlan interface up, - * pending_11d=false always. - * If remove below wait, it always happened scan fail and lead connect - * fail while wlan interface up, because it has a 11d scan which is running - * in firmware, and lead this scan failed. - */ - if (ar->pending_11d) { - long time_left; - unsigned long timeout = 5 * HZ; - - if (ar->supports_6ghz) - timeout += 5 * HZ; - - time_left = wait_for_completion_timeout(&ar->finish_11d_ch_list, timeout); - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, - "mac wait 11d channel list time left %ld\n", time_left); - } - memset(&arg, 0, sizeof(arg)); ath11k_wmi_start_scan_init(ar, &arg); arg.vdev_id = arvif->vdev_id; @@ -3686,6 +3666,10 @@ exit: kfree(arg.extraie.ptr); mutex_unlock(&ar->conf_mutex); + + if (ar->state_11d == ATH11K_11D_PREPARING) + ath11k_mac_11d_scan_start(ar, arvif->vdev_id); + return ret; } @@ -5814,7 +5798,7 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw) /* TODO: Do we need to enable ANI? */ - ath11k_reg_update_chan_list(ar); + ath11k_reg_update_chan_list(ar, false); ar->num_started_vdevs = 0; ar->num_created_vdevs = 0; @@ -5881,6 +5865,11 @@ static void ath11k_mac_op_stop(struct ieee80211_hw *hw) cancel_work_sync(&ar->ab->update_11d_work); cancel_work_sync(&ar->ab->rfkill_work); + if (ar->state_11d == ATH11K_11D_PREPARING) { + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + spin_lock_bh(&ar->data_lock); list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) { list_del(&ppdu_stats->list); @@ -6051,7 +6040,7 @@ static bool ath11k_mac_vif_ap_active_any(struct ath11k_base *ab) return false; } -void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait) +void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id) { struct wmi_11d_scan_start_params param; int ret; @@ -6079,28 +6068,22 @@ void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait) ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac start 11d scan\n"); - if (wait) - reinit_completion(&ar->finish_11d_scan); - ret = ath11k_wmi_send_11d_scan_start_cmd(ar, ¶m); if (ret) { ath11k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n", vdev_id, ret); } else { ar->vdev_id_11d_scan = vdev_id; - if (wait) { - ar->pending_11d = true; - ret = wait_for_completion_timeout(&ar->finish_11d_scan, - 5 * HZ); - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, - "mac 11d scan left time %d\n", ret); - - if (!ret) - ar->pending_11d = false; - } + if (ar->state_11d == ATH11K_11D_PREPARING) + ar->state_11d = ATH11K_11D_RUNNING; } fin: + if (ar->state_11d == ATH11K_11D_PREPARING) { + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + mutex_unlock(&ar->ab->vdev_id_11d_lock); } @@ -6123,12 +6106,15 @@ void ath11k_mac_11d_scan_stop(struct ath11k *ar) vdev_id = ar->vdev_id_11d_scan; ret = ath11k_wmi_send_11d_scan_stop_cmd(ar, vdev_id); - if (ret) + if (ret) { ath11k_warn(ar->ab, "failed to stopt 11d scan vdev %d ret: %d\n", vdev_id, ret); - else + } else { ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID; + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } } mutex_unlock(&ar->ab->vdev_id_11d_lock); } @@ -6324,8 +6310,10 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw, goto err_peer_del; } - ath11k_mac_11d_scan_start(ar, arvif->vdev_id, true); - + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map)) { + reinit_completion(&ar->completed_11d_scan); + ar->state_11d = ATH11K_11D_PREPARING; + } break; case WMI_VDEV_TYPE_MONITOR: set_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags); @@ -7190,7 +7178,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, } if (arvif->vdev_type == WMI_VDEV_TYPE_STA) - ath11k_mac_11d_scan_start(ar, arvif->vdev_id, false); + ath11k_mac_11d_scan_start(ar, arvif->vdev_id); mutex_unlock(&ar->conf_mutex); } @@ -8671,8 +8659,7 @@ int ath11k_mac_allocate(struct ath11k_base *ab) ar->monitor_vdev_id = -1; clear_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags); ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID; - init_completion(&ar->finish_11d_scan); - init_completion(&ar->finish_11d_ch_list); + init_completion(&ar->completed_11d_scan); } return 0; diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index 0e6c870b09c8..29b523af66dd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -130,7 +130,7 @@ extern const struct htt_rx_ring_tlv_filter ath11k_mac_mon_status_filter_default; #define ATH11K_SCAN_11D_INTERVAL 600000 #define ATH11K_11D_INVALID_VDEV_ID 0xFFFF -void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait); +void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id); void ath11k_mac_11d_scan_stop(struct ath11k *ar); void ath11k_mac_11d_scan_stop_all(struct ath11k_base *ab); diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 81e11cde31d7..80a697771393 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -102,7 +102,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) ar->regdom_set_by_user = true; } -int ath11k_reg_update_chan_list(struct ath11k *ar) +int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait) { struct ieee80211_supported_band **bands; struct scan_chan_list_params *params; @@ -111,7 +111,32 @@ int ath11k_reg_update_chan_list(struct ath11k *ar) struct channel_param *ch; enum nl80211_band band; int num_channels = 0; - int i, ret; + int i, ret, left; + + if (wait && ar->state_11d != ATH11K_11D_IDLE) { + left = wait_for_completion_timeout(&ar->completed_11d_scan, + ATH11K_SCAN_TIMEOUT_HZ); + if (!left) { + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "failed to receive 11d scan complete: timed out\n"); + ar->state_11d = ATH11K_11D_IDLE; + } + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "reg 11d scan wait left time %d\n", left); + } + + if (wait && + (ar->scan.state == ATH11K_SCAN_STARTING || + ar->scan.state == ATH11K_SCAN_RUNNING)) { + left = wait_for_completion_timeout(&ar->scan.completed, + ATH11K_SCAN_TIMEOUT_HZ); + if (!left) + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "failed to receive hw scan complete: timed out\n"); + + ath11k_dbg(ar->ab, ATH11K_DBG_REG, + "reg hw scan wait left time %d\n", left); + } bands = hw->wiphy->bands; for (band = 0; band < NUM_NL80211_BANDS; band++) { @@ -193,11 +218,6 @@ int ath11k_reg_update_chan_list(struct ath11k *ar) ret = ath11k_wmi_send_scan_chan_list_cmd(ar, params); kfree(params); - if (ar->pending_11d) { - complete(&ar->finish_11d_ch_list); - ar->pending_11d = false; - } - return ret; } @@ -263,15 +283,8 @@ int ath11k_regd_update(struct ath11k *ar) goto err; } - if (ar->pending_11d) - complete(&ar->finish_11d_scan); - rtnl_lock(); wiphy_lock(ar->hw->wiphy); - - if (ar->pending_11d) - reinit_completion(&ar->finish_11d_ch_list); - ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy); wiphy_unlock(ar->hw->wiphy); rtnl_unlock(); @@ -282,7 +295,7 @@ int ath11k_regd_update(struct ath11k *ar) goto err; if (ar->state == ATH11K_STATE_ON) { - ret = ath11k_reg_update_chan_list(ar); + ret = ath11k_reg_update_chan_list(ar, true); if (ret) goto err; } diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h index 5fb9dc03a74e..2f284f26378d 100644 --- a/drivers/net/wireless/ath/ath11k/reg.h +++ b/drivers/net/wireless/ath/ath11k/reg.h @@ -32,5 +32,5 @@ struct ieee80211_regdomain * ath11k_reg_build_regd(struct ath11k_base *ab, struct cur_regulatory_info *reg_info, bool intersect); int ath11k_regd_update(struct ath11k *ar); -int ath11k_reg_update_chan_list(struct ath11k *ar); +int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index b4f86c45d81f..2751fe8814df 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -2015,7 +2015,10 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar, { /* setup commonly used values */ arg->scan_req_id = 1; - arg->scan_priority = WMI_SCAN_PRIORITY_LOW; + if (ar->state_11d == ATH11K_11D_PREPARING) + arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM; + else + arg->scan_priority = WMI_SCAN_PRIORITY_LOW; arg->dwell_time_active = 50; arg->dwell_time_active_2g = 0; arg->dwell_time_passive = 150; @@ -6350,8 +6353,10 @@ static void ath11k_wmi_op_ep_tx_credits(struct ath11k_base *ab) static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *skb) { const struct wmi_11d_new_cc_ev *ev; + struct ath11k *ar; + struct ath11k_pdev *pdev; const void **tb; - int ret; + int ret, i; tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC); if (IS_ERR(tb)) { @@ -6377,6 +6382,13 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s kfree(tb); + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + ar->state_11d = ATH11K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + queue_work(ab->workqueue, &ab->update_11d_work); return 0; From b4e61fc031b11dd807dffc46cebbf0e25966d3d1 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 28 Apr 2022 23:14:43 -0700 Subject: [PATCH 442/803] Revert "mm/memory-failure.c: skip huge_zero_page in memory_failure()" Patch series "mm/memory-failure: rework fix on huge_zero_page splitting". This patch (of 2): This reverts commit d173d5417fb67411e623d394aab986d847e47dad. The commit d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") explicitly skips huge_zero_page in memory_failure(), in order to avoid triggering VM_BUG_ON_PAGE on huge_zero_page in split_huge_page_to_list(). This works, but Yang Shi thinks that, Raising BUG is overkilling for splitting huge_zero_page. The huge_zero_page can't be met from normal paths other than memory failure, but memory failure is a valid caller. So I tend to replace the BUG to WARN + returning -EBUSY. If we don't care about the reason code in memory failure, we don't have to touch memory failure. And for the issue that huge_zero_page will be set PG_has_hwpoisoned, Yang Shi comments that, The anonymous page fault doesn't check if the page is poisoned or not since it typically gets a fresh allocated page and assumes the poisoned page (isolated successfully) can't be reallocated again. But huge zero page and base zero page are reused every time. So no matter what fix we pick, the issue is always there. Finally, Yang, David, Anshuman and Naoya all agree to fix the bug, i.e., to split huge_zero_page, in split_huge_page_to_list(). This reverts the commit d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()"), and the original bug will be fixed by the next patch. Link: https://lkml.kernel.org/r/872cefb182ba1dd686b0e7db1e6b2ebe5a4fff87.1651039624.git.xuyu@linux.alibaba.com Fixes: d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") Fixes: 6a46079cf57a ("HWPOISON: The high level memory error handler in the VM v7") Signed-off-by: Xu Yu Suggested-by: Yang Shi Reviewed-by: Yang Shi Reviewed-by: Miaohe Lin Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 27760c19bad7..2020944398c9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1860,19 +1860,6 @@ try_again: } if (PageTransHuge(hpage)) { - /* - * Bail out before SetPageHasHWPoisoned() if hpage is - * huge_zero_page, although PG_has_hwpoisoned is not - * checked in set_huge_zero_page(). - * - * TODO: Handle memory failure of huge_zero_page thoroughly. - */ - if (is_huge_zero_page(hpage)) { - action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - res = -EBUSY; - goto unlock_mutex; - } - /* * The flag must be set after the refcount is bumped * otherwise it may race with THP split. From 478d134e9506c7e9bfe2830ed03dd85e97966313 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 28 Apr 2022 23:14:43 -0700 Subject: [PATCH 443/803] mm/huge_memory: do not overkill when splitting huge_zero_page Kernel panic when injecting memory_failure for the global huge_zero_page, when CONFIG_DEBUG_VM is enabled, as follows. Injecting memory failure for pfn 0x109ff9 at process virtual address 0x20ff9000 page:00000000fb053fc3 refcount:2 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x109e00 head:00000000fb053fc3 order:9 compound_mapcount:0 compound_pincount:0 flags: 0x17fffc000010001(locked|head|node=0|zone=2|lastcpupid=0x1ffff) raw: 017fffc000010001 0000000000000000 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000000000 00000002ffffffff 0000000000000000 page dumped because: VM_BUG_ON_PAGE(is_huge_zero_page(head)) ------------[ cut here ]------------ kernel BUG at mm/huge_memory.c:2499! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 553 Comm: split_bug Not tainted 5.18.0-rc1+ #11 Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 3288b3c 04/01/2014 RIP: 0010:split_huge_page_to_list+0x66a/0x880 Code: 84 9b fb ff ff 48 8b 7c 24 08 31 f6 e8 9f 5d 2a 00 b8 b8 02 00 00 e9 e8 fb ff ff 48 c7 c6 e8 47 3c 82 4c b RSP: 0018:ffffc90000dcbdf8 EFLAGS: 00010246 RAX: 000000000000003c RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff823e4c4f RDI: 00000000ffffffff RBP: ffff88843fffdb40 R08: 0000000000000000 R09: 00000000fffeffff R10: ffffc90000dcbc48 R11: ffffffff82d68448 R12: ffffea0004278000 R13: ffffffff823c6203 R14: 0000000000109ff9 R15: ffffea000427fe40 FS: 00007fc375a26740(0000) GS:ffff88842fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc3757c9290 CR3: 0000000102174006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: try_to_split_thp_page+0x3a/0x130 memory_failure+0x128/0x800 madvise_inject_error.cold+0x8b/0xa1 __x64_sys_madvise+0x54/0x60 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fc3754f8bf9 Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 8 RSP: 002b:00007ffeda93a1d8 EFLAGS: 00000217 ORIG_RAX: 000000000000001c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc3754f8bf9 RDX: 0000000000000064 RSI: 0000000000003000 RDI: 0000000020ff9000 RBP: 00007ffeda93a200 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000ffffffff R11: 0000000000000217 R12: 0000000000400490 R13: 00007ffeda93a2e0 R14: 0000000000000000 R15: 0000000000000000 We think that raising BUG is overkilling for splitting huge_zero_page, the huge_zero_page can't be met from normal paths other than memory failure, but memory failure is a valid caller. So we tend to replace the BUG to WARN + returning -EBUSY, and thus the panic above won't happen again. Link: https://lkml.kernel.org/r/f35f8b97377d5d3ede1bc5ac3114da888c57cbce.1651052574.git.xuyu@linux.alibaba.com Fixes: d173d5417fb6 ("mm/memory-failure.c: skip huge_zero_page in memory_failure()") Fixes: 6a46079cf57a ("HWPOISON: The high level memory error handler in the VM v7") Signed-off-by: Xu Yu Suggested-by: Yang Shi Reported-by: kernel test robot Reviewed-by: Naoya Horiguchi Reviewed-by: Yang Shi Reviewed-by: Miaohe Lin Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c468fee595ff..910a138e9859 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2495,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct address_space *mapping = NULL; int extra_pins, ret; pgoff_t end; + bool is_hzp; - VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(head), head); VM_BUG_ON_PAGE(!PageCompound(head), head); + is_hzp = is_huge_zero_page(head); + VM_WARN_ON_ONCE_PAGE(is_hzp, head); + if (is_hzp) + return -EBUSY; + if (PageWriteback(head)) return -EBUSY; From 1825b93b626e99eb9a0f9f50342c7b2fa201b387 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 28 Apr 2022 23:14:44 -0700 Subject: [PATCH 444/803] mm/hwpoison: use pr_err() instead of dump_page() in get_any_page() The following VM_BUG_ON_FOLIO() is triggered when memory error event happens on the (thp/folio) pages which are about to be freed: [ 1160.232771] page:00000000b36a8a0f refcount:1 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x16a000 [ 1160.236916] page:00000000b36a8a0f refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x16a000 [ 1160.240684] flags: 0x57ffffc0800000(hwpoison|node=1|zone=2|lastcpupid=0x1fffff) [ 1160.243458] raw: 0057ffffc0800000 dead000000000100 dead000000000122 0000000000000000 [ 1160.246268] raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 [ 1160.249197] page dumped because: VM_BUG_ON_FOLIO(!folio_test_large(folio)) [ 1160.251815] ------------[ cut here ]------------ [ 1160.253438] kernel BUG at include/linux/mm.h:788! [ 1160.256162] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 1160.258172] CPU: 2 PID: 115368 Comm: mceinj.sh Tainted: G E 5.18.0-rc1-v5.18-rc1-220404-2353-005-g83111+ #3 [ 1160.262049] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014 [ 1160.265103] RIP: 0010:dump_page.cold+0x27e/0x2bd [ 1160.266757] Code: fe ff ff 48 c7 c6 81 f1 5a 98 e9 4c fe ff ff 48 c7 c6 a1 95 59 98 e9 40 fe ff ff 48 c7 c6 50 bf 5a 98 48 89 ef e8 9d 04 6d ff <0f> 0b 41 f7 c4 ff 0f 00 00 0f 85 9f fd ff ff 49 8b 04 24 a9 00 00 [ 1160.273180] RSP: 0018:ffffaa2c4d59fd18 EFLAGS: 00010292 [ 1160.274969] RAX: 000000000000003e RBX: 0000000000000001 RCX: 0000000000000000 [ 1160.277263] RDX: 0000000000000001 RSI: ffffffff985995a1 RDI: 00000000ffffffff [ 1160.279571] RBP: ffffdc9c45a80000 R08: 0000000000000000 R09: 00000000ffffdfff [ 1160.281794] R10: ffffaa2c4d59fb08 R11: ffffffff98940d08 R12: ffffdc9c45a80000 [ 1160.283920] R13: ffffffff985b6f94 R14: 0000000000000000 R15: ffffdc9c45a80000 [ 1160.286641] FS: 00007eff54ce1740(0000) GS:ffff99c67bd00000(0000) knlGS:0000000000000000 [ 1160.289498] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1160.291106] CR2: 00005628381a5f68 CR3: 0000000104712003 CR4: 0000000000170ee0 [ 1160.293031] Call Trace: [ 1160.293724] [ 1160.294334] get_hwpoison_page+0x47d/0x570 [ 1160.295474] memory_failure+0x106/0xaa0 [ 1160.296474] ? security_capable+0x36/0x50 [ 1160.297524] hard_offline_page_store+0x43/0x80 [ 1160.298684] kernfs_fop_write_iter+0x11c/0x1b0 [ 1160.299829] new_sync_write+0xf9/0x160 [ 1160.300810] vfs_write+0x209/0x290 [ 1160.301835] ksys_write+0x4f/0xc0 [ 1160.302718] do_syscall_64+0x3b/0x90 [ 1160.303664] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 1160.304981] RIP: 0033:0x7eff54b018b7 As shown in the RIP address, this VM_BUG_ON in folio_entire_mapcount() is called from dump_page("hwpoison: unhandlable page") in get_any_page(). The below explains the mechanism of the race: CPU 0 CPU 1 memory_failure get_hwpoison_page get_any_page dump_page compound = PageCompound free_pages_prepare page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP folio_entire_mapcount VM_BUG_ON_FOLIO(!folio_test_large(folio)) So replace dump_page() with safer one, pr_err(). Link: https://lkml.kernel.org/r/20220427053220.719866-1-naoya.horiguchi@linux.dev Fixes: 74e8ee4708a8 ("mm: Turn head_compound_mapcount() into folio_entire_mapcount()") Signed-off-by: Naoya Horiguchi Reviewed-by: John Hubbard Reviewed-by: Miaohe Lin Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: William Kucharski Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2020944398c9..d4a4adcca01f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1274,7 +1274,7 @@ try_again: } out: if (ret == -EIO) - dump_page(p, "hwpoison: unhandlable page"); + pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p)); return ret; } From 72ed3ee9fa0b461ad086403a8b5336154bd82234 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 22 Apr 2022 10:23:37 +0200 Subject: [PATCH 445/803] can: isotp: remove re-binding of bound socket As a carry over from the CAN_RAW socket (which allows to change the CAN interface while mantaining the filter setup) the re-binding of the CAN_ISOTP socket needs to take care about CAN ID address information and subscriptions. It turned out that this feature is so limited (e.g. the sockopts remain fix) that it finally has never been needed/used. In opposite to the stateless CAN_RAW socket the switching of the CAN ID subscriptions might additionally lead to an interrupted ongoing PDU reception. So better remove this unneeded complexity. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20220422082337.1676-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index ff5d7870294e..1e7c6a460ef9 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1189,6 +1189,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) lock_sock(sk); + if (so->bound) { + err = -EINVAL; + goto out; + } + /* do not register frame reception for functional addressing */ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) do_rx_reg = 0; @@ -1199,10 +1204,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out; } - if (so->bound && addr->can_ifindex == so->ifindex && - rx_id == so->rxid && tx_id == so->txid) - goto out; - dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { err = -ENODEV; @@ -1237,22 +1238,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) dev_put(dev); - if (so->bound && do_rx_reg) { - /* unregister old filter */ - if (so->ifindex) { - dev = dev_get_by_index(net, so->ifindex); - if (dev) { - can_rx_unregister(net, dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); - can_rx_unregister(net, dev, so->txid, - SINGLE_MASK(so->txid), - isotp_rcv_echo, sk); - dev_put(dev); - } - } - } - /* switch to new settings */ so->ifindex = ifindex; so->rxid = rx_id; From 47f070a63e735bcc8d481de31be1b5a1aa62b31c Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 25 Apr 2022 12:24:00 +0800 Subject: [PATCH 446/803] can: grcan: grcan_close(): fix deadlock There are deadlocks caused by del_timer_sync(&priv->hang_timer) and del_timer_sync(&priv->rr_timer) in grcan_close(), one of the deadlocks are shown below: (Thread 1) | (Thread 2) | grcan_reset_timer() grcan_close() | mod_timer() spin_lock_irqsave() //(1) | (wait a time) ... | grcan_initiate_running_reset() del_timer_sync() | spin_lock_irqsave() //(2) (wait timer to stop) | ... We hold priv->lock in position (1) of thread 1 and use del_timer_sync() to wait timer to stop, but timer handler also need priv->lock in position (2) of thread 2. As a result, grcan_close() will block forever. This patch extracts del_timer_sync() from the protection of spin_lock_irqsave(), which could let timer handler to obtain the needed lock. Link: https://lore.kernel.org/all/20220425042400.66517-1-duoming@zju.edu.cn Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Cc: stable@vger.kernel.org Signed-off-by: Duoming Zhou Reviewed-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index d0c5a7a60daf..1189057b5d68 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1102,8 +1102,10 @@ static int grcan_close(struct net_device *dev) priv->closing = true; if (priv->need_txbug_workaround) { + spin_unlock_irqrestore(&priv->lock, flags); del_timer_sync(&priv->hang_timer); del_timer_sync(&priv->rr_timer); + spin_lock_irqsave(&priv->lock, flags); } netif_stop_queue(dev); grcan_stop_hardware(dev); From 101da4268626b00d16356a6bf284d66e44c46ff9 Mon Sep 17 00:00:00 2001 From: Daniel Hellstrom Date: Fri, 29 Apr 2022 10:46:54 +0200 Subject: [PATCH 447/803] can: grcan: use ofdev->dev when allocating DMA memory Use the device of the device tree node should be rather than the device of the struct net_device when allocating DMA buffers. The driver got away with it on sparc32 until commit 53b7670e5735 ("sparc: factor the dma coherent mapping into helper") after which the driver oopses. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-2-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Daniel Hellstrom Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 1189057b5d68..f8860900575b 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -248,6 +248,7 @@ struct grcan_device_config { struct grcan_priv { struct can_priv can; /* must be the first member */ struct net_device *dev; + struct device *ofdev_dev; struct napi_struct napi; struct grcan_registers __iomem *regs; /* ioremap'ed registers */ @@ -921,7 +922,7 @@ static void grcan_free_dma_buffers(struct net_device *dev) struct grcan_priv *priv = netdev_priv(dev); struct grcan_dma *dma = &priv->dma; - dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf, + dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf, dma->base_handle); memset(dma, 0, sizeof(*dma)); } @@ -946,7 +947,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev, /* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */ dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT; - dma->base_buf = dma_alloc_coherent(&dev->dev, + dma->base_buf = dma_alloc_coherent(priv->ofdev_dev, dma->base_size, &dma->base_handle, GFP_KERNEL); @@ -1589,6 +1590,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev, memcpy(&priv->config, &grcan_module_config, sizeof(struct grcan_device_config)); priv->dev = dev; + priv->ofdev_dev = &ofdev->dev; priv->regs = base; priv->can.bittiming_const = &grcan_bittiming_const; priv->can.do_set_bittiming = grcan_set_bittiming; From 1e93ed26acf03fe6c97c6d573a10178596aadd43 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Fri, 29 Apr 2022 10:46:55 +0200 Subject: [PATCH 448/803] can: grcan: grcan_probe(): fix broken system id check for errata workaround needs The systemid property was checked for in the wrong place of the device tree and compared to the wrong value. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-3-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index f8860900575b..4ca3da56d3aa 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -241,7 +241,7 @@ struct grcan_device_config { .rxsize = GRCAN_DEFAULT_BUFFER_SIZE, \ } -#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100 +#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100 #define GRLIB_VERSION_MASK 0xffff /* GRCAN private data structure */ @@ -1643,6 +1643,7 @@ exit_free_candev: static int grcan_probe(struct platform_device *ofdev) { struct device_node *np = ofdev->dev.of_node; + struct device_node *sysid_parent; u32 sysid, ambafreq; int irq, err; void __iomem *base; @@ -1651,10 +1652,15 @@ static int grcan_probe(struct platform_device *ofdev) /* Compare GRLIB version number with the first that does not * have the tx bug (see start_xmit) */ - err = of_property_read_u32(np, "systemid", &sysid); - if (!err && ((sysid & GRLIB_VERSION_MASK) - >= GRCAN_TXBUG_SAFE_GRLIB_VERSION)) - txbug = false; + sysid_parent = of_find_node_by_path("/ambapp0"); + if (sysid_parent) { + of_node_get(sysid_parent); + err = of_property_read_u32(sysid_parent, "systemid", &sysid); + if (!err && ((sysid & GRLIB_VERSION_MASK) >= + GRCAN_TXBUG_SAFE_GRLIB_VERSION)) + txbug = false; + of_node_put(sysid_parent); + } err = of_property_read_u32(np, "freq", &ambafreq); if (err) { From 2873d4d52f7c52d60b316ba6c47bd7122b5a9861 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Fri, 29 Apr 2022 10:46:56 +0200 Subject: [PATCH 449/803] can: grcan: only use the NAPI poll budget for RX The previous split budget between TX and RX made it return not using the entire budget but at the same time not having calling called napi_complete. This sometimes led to the poll to not be called, and at the same time having TX and RX interrupts disabled resulting in the driver getting stuck. Fixes: 6cec9b07fe6a ("can: grcan: Add device driver for GRCAN and GRHCAN cores") Link: https://lore.kernel.org/all/20220429084656.29788-4-andreas@gaisler.com Cc: stable@vger.kernel.org Signed-off-by: Andreas Larsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/grcan.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 4ca3da56d3aa..5215bd9b2c80 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1125,7 +1125,7 @@ static int grcan_close(struct net_device *dev) return 0; } -static int grcan_transmit_catch_up(struct net_device *dev, int budget) +static void grcan_transmit_catch_up(struct net_device *dev) { struct grcan_priv *priv = netdev_priv(dev); unsigned long flags; @@ -1133,7 +1133,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) spin_lock_irqsave(&priv->lock, flags); - work_done = catch_up_echo_skb(dev, budget, true); + work_done = catch_up_echo_skb(dev, -1, true); if (work_done) { if (!priv->resetting && !priv->closing && !(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) @@ -1147,8 +1147,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - - return work_done; } static int grcan_receive(struct net_device *dev, int budget) @@ -1230,19 +1228,13 @@ static int grcan_poll(struct napi_struct *napi, int budget) struct net_device *dev = priv->dev; struct grcan_registers __iomem *regs = priv->regs; unsigned long flags; - int tx_work_done, rx_work_done; - int rx_budget = budget / 2; - int tx_budget = budget - rx_budget; + int work_done; - /* Half of the budget for receiving messages */ - rx_work_done = grcan_receive(dev, rx_budget); + work_done = grcan_receive(dev, budget); - /* Half of the budget for transmitting messages as that can trigger echo - * frames being received - */ - tx_work_done = grcan_transmit_catch_up(dev, tx_budget); + grcan_transmit_catch_up(dev); - if (rx_work_done < rx_budget && tx_work_done < tx_budget) { + if (work_done < budget) { napi_complete(napi); /* Guarantee no interference with a running reset that otherwise @@ -1259,7 +1251,7 @@ static int grcan_poll(struct napi_struct *napi, int budget) spin_unlock_irqrestore(&priv->lock, flags); } - return rx_work_done + tx_work_done; + return work_done; } /* Work tx bug by waiting while for the risky situation to clear. If that fails, From 7e0815b3e09986d2fe651199363e135b9358132a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 Apr 2022 15:50:54 +0200 Subject: [PATCH 450/803] x86/pci/xen: Disable PCI/MSI[-X] masking for XEN_HVM guests When a XEN_HVM guest uses the XEN PIRQ/Eventchannel mechanism, then PCI/MSI[-X] masking is solely controlled by the hypervisor, but contrary to XEN_PV guests this does not disable PCI/MSI[-X] masking in the PCI/MSI layer. This can lead to a situation where the PCI/MSI layer masks an MSI[-X] interrupt and the hypervisor grants the write despite the fact that it already requested the interrupt. As a consequence interrupt delivery on the affected device is not happening ever. Set pci_msi_ignore_mask to prevent that like it's done for XEN_PV guests already. Fixes: 809f9267bbab ("xen: map MSIs into pirqs") Reported-by: Jeremi Piotrowski Reported-by: Dusty Mabe Reported-by: Salvatore Bonaccorso Signed-off-by: Thomas Gleixner Tested-by: Noah Meyerhans Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87tuaduxj5.ffs@tglx --- arch/x86/pci/xen.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 9bb1e2941179..b94f727251b6 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -467,7 +467,6 @@ static __init void xen_setup_pci_msi(void) else xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs; xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs; - pci_msi_ignore_mask = 1; } else if (xen_hvm_domain()) { xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs; xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs; @@ -481,6 +480,11 @@ static __init void xen_setup_pci_msi(void) * in allocating the native domain and never use it. */ x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain; + /* + * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely + * controlled by the hypervisor. + */ + pci_msi_ignore_mask = 1; } #else /* CONFIG_PCI_MSI */ From efce2d0ba6bf70994394a5a139347ced4d172771 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Apr 2022 11:15:08 -0400 Subject: [PATCH 451/803] SUNRPC: Ensure timely close of disconnected AF_LOCAL sockets When the rpcbind server closes the socket, we need to ensure that the socket is closed by the kernel as soon as feasible, so add a sk_state_change callback to trigger this close. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f9849b297ea3..25b8a8ead56b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1418,6 +1418,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +/** + * xs_local_state_change - callback to handle AF_LOCAL socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_local_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct sock_xprt *transport; + + if (!(xprt = xprt_from_sock(sk))) + return; + transport = container_of(xprt, struct sock_xprt, xprt); + if (sk->sk_shutdown & SHUTDOWN_MASK) { + clear_bit(XPRT_CONNECTED, &xprt->state); + /* Trigger the socket release */ + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); + } +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1866,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_state_change = xs_local_state_change; sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); From 09df6a75fffa68169c5ef9bef990cd7ba94f3eef Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Apr 2022 16:07:38 +0200 Subject: [PATCH 452/803] bfq: Fix warning in bfqq_request_over_limit() People are occasionally reporting a warning bfqq_request_over_limit() triggering reporting that BFQ's idea of cgroup hierarchy (and its depth) does not match what generic blkcg code thinks. This can actually happen when bfqq gets moved between BFQ groups while bfqq_request_over_limit() is running. Make sure the code is safe against BFQ queue being moved to a different BFQ group. Fixes: 76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup") CC: stable@vger.kernel.org Link: https://lore.kernel.org/all/CAJCQCtTw_2C7ZSz7as5Gvq=OmnDiio=HRkQekqWpKot84sQhFA@mail.gmail.com/ Reported-by: Chris Murphy Reported-by: "yukuai (C)" Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220407140738.9723-1-jack@suse.cz Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 2e0dd68a3cbe..1f62dbdc521f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -569,7 +569,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; struct bfq_entity **entities = inline_entities; - int depth, level; + int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; int class_idx = bfqq->ioprio_class - 1; struct bfq_sched_data *sched_data; unsigned long wsum; @@ -578,15 +578,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) if (!entity->on_st_or_in_serv) return false; +retry: + spin_lock_irq(&bfqd->lock); /* +1 for bfqq entity, root cgroup not included */ depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; - if (depth > BFQ_LIMIT_INLINE_DEPTH) { + if (depth > alloc_depth) { + spin_unlock_irq(&bfqd->lock); + if (entities != inline_entities) + kfree(entities); entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO); if (!entities) return false; + alloc_depth = depth; + goto retry; } - spin_lock_irq(&bfqd->lock); sched_data = entity->sched_data; /* Gather our ancestors as we need to traverse them in reverse order */ level = 0; From f0a6c68f69981214cb7858738dd2bc81475111f7 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 24 Apr 2022 12:46:23 +0100 Subject: [PATCH 453/803] MIPS: Fix CP0 counter erratum detection for R4k CPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the discrepancy between the two places we check for the CP0 counter erratum in along with the incorrect comparison of the R4400 revision number against 0x30 which matches none and consistently consider all R4000 and R4400 processors affected, as documented in processor errata publications[1][2][3], following the mapping between CP0 PRId register values and processor models: PRId | Processor Model ---------+-------------------- 00000422 | R4000 Revision 2.2 00000430 | R4000 Revision 3.0 00000440 | R4400 Revision 1.0 00000450 | R4400 Revision 2.0 00000460 | R4400 Revision 3.0 No other revision of either processor has ever been spotted. Contrary to what has been stated in commit ce202cbb9e0b ("[MIPS] Assume R4000/R4400 newer than 3.0 don't have the mfc0 count bug") marking the CP0 counter as buggy does not preclude it from being used as either a clock event or a clock source device. It just cannot be used as both at a time, because in that case clock event interrupts will be occasionally lost, and the use as a clock event device takes precedence. Compare against 0x4ff in `can_use_mips_counter' so that a single machine instruction is produced. References: [1] "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0", MIPS Technologies Inc., May 10, 1994, Erratum 53, p.13 [2] "MIPS R4400PC/SC Errata, Processor Revision 1.0", MIPS Technologies Inc., February 9, 1994, Erratum 21, p.4 [3] "MIPS R4400PC/SC Errata, Processor Revision 2.0 & 3.0", MIPS Technologies Inc., January 24, 1995, Erratum 14, p.3 Signed-off-by: Maciej W. Rozycki Fixes: ce202cbb9e0b ("[MIPS] Assume R4000/R4400 newer than 3.0 don't have the mfc0 count bug") Cc: stable@vger.kernel.org # v2.6.24+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/timex.h | 8 ++++---- arch/mips/kernel/time.c | 11 +++-------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index b05bb70a2e46..8026baf46e72 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -40,9 +40,9 @@ typedef unsigned int cycles_t; /* - * On R4000/R4400 before version 5.0 an erratum exists such that if the - * cycle counter is read in the exact moment that it is matching the - * compare register, no interrupt will be generated. + * On R4000/R4400 an erratum exists such that if the cycle counter is + * read in the exact moment that it is matching the compare register, + * no interrupt will be generated. * * There is a suggested workaround and also the erratum can't strike if * the compare interrupt isn't being used as the clock source device. @@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid) if (!__builtin_constant_p(cpu_has_counter)) asm volatile("" : "=m" (cpu_data[0].options)); if (likely(cpu_has_counter && - prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0)))) + prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15)))) return 1; else return 0; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index caa01457dce6..ed339d7979f3 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void) case CPU_R4400MC: /* * The published errata for the R4400 up to 3.0 say the CPU - * has the mfc0 from count bug. + * has the mfc0 from count bug. This seems the last version + * produced. */ - if ((current_cpu_data.processor_id & 0xff) <= 0x30) - return 1; - - /* - * we assume newer revisions are ok - */ - return 0; + return 1; } return 0; From 303cc749c8659d5f1ccf97973591313ec0bdacd3 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Fri, 29 Apr 2022 16:22:18 +0200 Subject: [PATCH 454/803] io_uring: check that data field is 0 in ringfd unregister Only allow data field to be 0 in struct io_uring_rsrc_update user arguments to allow for future possible usage. Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors") Signed-off-by: Eugene Syromiatnikov Link: https://lore.kernel.org/r/20220429142218.GA28696@asgard.redhat.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e3ae26ff5d1a..e01f595f5b7d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10593,7 +10593,7 @@ static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, ret = -EFAULT; break; } - if (reg.resv || reg.offset >= IO_RINGFD_REG_MAX) { + if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { ret = -EINVAL; break; } From c6fe81191bd74f7e6ae9ce96a4837df9485f3ab8 Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Tue, 22 Mar 2022 15:28:39 +0200 Subject: [PATCH 455/803] RISC-V: relocate DTB if it's outside memory region In case the DTB provided by the bootloader/BootROM is before the kernel image or outside /memory, we won't be able to access it through the linear mapping, and get a segfault on setup_arch(). Currently OpenSBI relocates DTB but that's not always the case (e.g. if FW_JUMP_FDT_ADDR is not specified), and it's also not the most portable approach since the default FW_JUMP_FDT_ADDR of the generic platform relocates the DTB at a specific offset that may not be available. To avoid this situation copy DTB so that it's visible through the linear mapping. Signed-off-by: Nick Kossifidis Link: https://lore.kernel.org/r/20220322132839.3653682-1-mick@ics.forth.gr Tested-by: Conor Dooley Fixes: f105aa940e78 ("riscv: add BUILTIN_DTB support for MMU-enabled targets") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b0793dc0c291..05ed641a1134 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -208,8 +208,25 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { + /* + * In case the DTB is not located in a memory region we won't + * be able to locate it later on via the linear mapping and + * get a segfault when accessing it via __va(dtb_early_pa). + * To avoid this situation copy DTB to a memory region. + * Note that memblock_phys_alloc will also reserve DTB region. + */ + if (!memblock_is_memory(dtb_early_pa)) { + size_t fdt_size = fdt_totalsize(dtb_early_va); + phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); + void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); + + memcpy(new_dtb_early_va, dtb_early_va, fdt_size); + early_memunmap(new_dtb_early_va, fdt_size); + _dtb_early_pa = new_dtb_early_pa; + } else + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + } early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); From 3d092ef09303e615707dc5755cf0e29b4df7555f Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 19 Apr 2022 12:08:09 -0500 Subject: [PATCH 456/803] ipmi: When handling send message responses, don't process the message A chunk was dropped when the code handling send messages was rewritten. Those messages shouldn't be processed normally, they are just an indication that the message was successfully sent and the timers should be started for the real response that should be coming later. Add back in the missing chunk to just discard the message and go on. Fixes: 059747c245f0 ("ipmi: Add support for IPMB direct messages") Reported-by: Joe Wiese Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Corey Minyard Tested-by: Joe Wiese --- drivers/char/ipmi/ipmi_msghandler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c59265146e9c..5f7abf456a17 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4518,6 +4518,8 @@ return_unspecified: } else /* The message was sent, start the timer. */ intf_start_seq_timer(intf, msg->msgid); + requeue = 0; + goto out; } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1)) || (msg->rsp[1] != msg->data[1])) { /* From 9cc3aac42566a0021e0ab7c4e9b31667ad75b1e3 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 21 Apr 2022 06:49:43 -0500 Subject: [PATCH 457/803] ipmi:ipmi_ipmb: Fix null-ptr-deref in ipmi_unregister_smi() KASAN report null-ptr-deref as follows: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:ipmi_unregister_smi+0x7d/0xd50 drivers/char/ipmi/ipmi_msghandler.c:3680 Call Trace: ipmi_ipmb_remove+0x138/0x1a0 drivers/char/ipmi/ipmi_ipmb.c:443 ipmi_ipmb_probe+0x409/0xda1 drivers/char/ipmi/ipmi_ipmb.c:548 i2c_device_probe+0x959/0xac0 drivers/i2c/i2c-core-base.c:563 really_probe+0x3f3/0xa70 drivers/base/dd.c:541 In ipmi_ipmb_probe(), 'iidev->intf' is not set before ipmi_register_smi() success. And in the error handling case, ipmi_ipmb_remove() is called to release resources, ipmi_unregister_smi() is called without check 'iidev->intf', this will cause KASAN null-ptr-deref issue. General kernel style is to allow NULL to be passed into unregister calls, so fix it that way. This allows a NULL check to be removed in other code. Fixes: 57c9e3c9a374 ("ipmi:ipmi_ipmb: Unregister the SMI on remove") Reported-by: Hulk Robot Cc: stable@vger.kernel.org # v5.17+ Cc: Wei Yongjun Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 5 ++++- drivers/char/ipmi/ipmi_si_intf.c | 5 +---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 5f7abf456a17..f1827257ef0e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3677,8 +3677,11 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf) void ipmi_unregister_smi(struct ipmi_smi *intf) { struct ipmi_smi_watcher *w; - int intf_num = intf->intf_num, index; + int intf_num, index; + if (!intf) + return; + intf_num = intf->intf_num; mutex_lock(&ipmi_interfaces_mutex); intf->intf_num = -1; intf->in_shutdown = true; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 64dedb3ef8ec..5604a810fb3d 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2220,10 +2220,7 @@ static void cleanup_one_si(struct smi_info *smi_info) return; list_del(&smi_info->link); - - if (smi_info->intf) - ipmi_unregister_smi(smi_info->intf); - + ipmi_unregister_smi(smi_info->intf); kfree(smi_info); } From 892de36fd4a98fab3298d417c051d9099af5448d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Apr 2022 12:22:10 -0400 Subject: [PATCH 458/803] SUNRPC: Ensure gss-proxy connects on setup For reasons best known to the author, gss-proxy does not implement a NULL procedure, and returns RPC_PROC_UNAVAIL. However we still want to ensure that we connect to the service at setup time. So add a quirk-flag specially for this case. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- net/sunrpc/clnt.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 267b7aeaf1a6..db5149567305 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf2..8ca1d809b78d 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -97,7 +97,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * timeout, which would result in reconnections being * done without the correct namespace: */ - .flags = RPC_CLNT_CREATE_NOPING | + .flags = RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 98133aa54f19..22c28cf43eba 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -479,6 +479,9 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { int err = rpc_ping(clnt); + if ((args->flags & RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL) && + err == -EOPNOTSUPP) + err = 0; if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); From 86931ff7207bc045fa5439ef97b31859613dc303 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 28 Apr 2022 23:34:16 +0000 Subject: [PATCH 459/803] KVM: x86/mmu: Do not create SPTEs for GFNs that exceed host.MAXPHYADDR Disallow memslots and MMIO SPTEs whose gpa range would exceed the host's MAXPHYADDR, i.e. don't create SPTEs for gfns that exceed host.MAXPHYADDR. The TDP MMU bounds its zapping based on host.MAXPHYADDR, and so if the guest, possibly with help from userspace, manages to coerce KVM into creating a SPTE for an "impossible" gfn, KVM will leak the associated shadow pages (page tables): WARNING: CPU: 10 PID: 1122 at arch/x86/kvm/mmu/tdp_mmu.c:57 kvm_mmu_uninit_tdp_mmu+0x4b/0x60 [kvm] Modules linked in: kvm_intel kvm irqbypass CPU: 10 PID: 1122 Comm: set_memory_regi Tainted: G W 5.18.0-rc1+ #293 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_mmu_uninit_tdp_mmu+0x4b/0x60 [kvm] Call Trace: kvm_arch_destroy_vm+0x130/0x1b0 [kvm] kvm_destroy_vm+0x162/0x2d0 [kvm] kvm_vm_release+0x1d/0x30 [kvm] __fput+0x82/0x240 task_work_run+0x5b/0x90 exit_to_user_mode_prepare+0xd2/0xe0 syscall_exit_to_user_mode+0x1d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae On bare metal, encountering an impossible gpa in the page fault path is well and truly impossible, barring CPU bugs, as the CPU will signal #PF during the gva=>gpa translation (or a similar failure when stuffing a physical address into e.g. the VMCS/VMCB). But if KVM is running as a VM itself, the MAXPHYADDR enumerated to KVM may not be the actual MAXPHYADDR of the underlying hardware, in which case the hardware will not fault on the illegal-from-KVM's-perspective gpa. Alternatively, KVM could continue allowing the dodgy behavior and simply zap the max possible range. But, for hosts with MAXPHYADDR < 52, that's a (minor) waste of cycles, and more importantly, KVM can't reasonably support impossible memslots when running on bare metal (or with an accurate MAXPHYADDR as a VM). Note, limiting the overhead by checking if KVM is running as a guest is not a safe option as the host isn't required to announce itself to the guest in any way, e.g. doesn't need to set the HYPERVISOR CPUID bit. A second alternative to disallowing the memslot behavior would be to disallow creating a VM with guest.MAXPHYADDR > host.MAXPHYADDR. That restriction is undesirable as there are legitimate use cases for doing so, e.g. using the highest host.MAXPHYADDR out of a pool of heterogeneous systems so that VMs can be migrated between hosts with different MAXPHYADDRs without running afoul of the allow_smaller_maxphyaddr mess. Note that any guest.MAXPHYADDR is valid with shadow paging, and it is even useful in order to test KVM with MAXPHYADDR=52 (i.e. without any reserved physical address bits). The now common kvm_mmu_max_gfn() is inclusive instead of exclusive. The memslot and TDP MMU code want an exclusive value, but the name implies the returned value is inclusive, and the MMIO path needs an inclusive check. Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 524a1e4e381f ("KVM: x86/mmu: Don't leak non-leaf SPTEs when zapping all SPTEs") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Cc: Ben Gardon Cc: David Matlack Signed-off-by: Sean Christopherson Message-Id: <20220428233416.2446833-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 24 ++++++++++++++++++++++++ arch/x86/kvm/mmu/mmu.c | 10 ++++++++-- arch/x86/kvm/mmu/spte.h | 6 ------ arch/x86/kvm/mmu/tdp_mmu.c | 15 ++++++++------- arch/x86/kvm/x86.c | 6 +++++- 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e6cae6f22683..a335e7f1f69e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -65,6 +65,30 @@ static __always_inline u64 rsvd_bits(int s, int e) return ((2ULL << (e - s)) - 1) << s; } +/* + * The number of non-reserved physical address bits irrespective of features + * that repurpose legal bits, e.g. MKTME. + */ +extern u8 __read_mostly shadow_phys_bits; + +static inline gfn_t kvm_mmu_max_gfn(void) +{ + /* + * Note that this uses the host MAXPHYADDR, not the guest's. + * EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR; + * assuming KVM is running on bare metal, guest accesses beyond + * host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit + * (either EPT Violation/Misconfig or #NPF), and so KVM will never + * install a SPTE for such addresses. If KVM is running as a VM + * itself, on the other hand, it might see a MAXPHYADDR that is less + * than hardware's real MAXPHYADDR. Using the host MAXPHYADDR + * disallows such SPTEs entirely and simplifies the TDP MMU. + */ + int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52; + + return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1; +} + void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f9080ee50ffa..5f6225c384e6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2992,9 +2992,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa /* * If MMIO caching is disabled, emulate immediately without * touching the shadow page tables as attempting to install an - * MMIO SPTE will just be an expensive nop. + * MMIO SPTE will just be an expensive nop. Do not cache MMIO + * whose gfn is greater than host.MAXPHYADDR, any guest that + * generates such gfns is running nested and is being tricked + * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if + * and only if L1's MAXPHYADDR is inaccurate with respect to + * the hardware's). */ - if (unlikely(!shadow_mmio_value)) { + if (unlikely(!shadow_mmio_value) || + unlikely(fault->gfn > kvm_mmu_max_gfn())) { *ret_val = RET_PF_EMULATE; return true; } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 73f12615416f..e4abeb5df1b1 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -201,12 +201,6 @@ static inline bool is_removed_spte(u64 spte) */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; -/* - * The number of non-reserved physical address bits irrespective of features - * that repurpose legal bits, e.g. MKTME. - */ -extern u8 __read_mostly shadow_phys_bits; - static inline bool is_mmio_spte(u64 spte) { return (spte & shadow_mmio_mask) == shadow_mmio_value && diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c472769e0300..edc68538819b 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -815,14 +815,15 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, return iter->yielded; } -static inline gfn_t tdp_mmu_max_gfn_host(void) +static inline gfn_t tdp_mmu_max_gfn_exclusive(void) { /* - * Bound TDP MMU walks at host.MAXPHYADDR, guest accesses beyond that - * will hit a #PF(RSVD) and never hit an EPT Violation/Misconfig / #NPF, - * and so KVM will never install a SPTE for such addresses. + * Bound TDP MMU walks at host.MAXPHYADDR. KVM disallows memslots with + * a gpa range that would exceed the max gfn, and KVM does not create + * MMIO SPTEs for "impossible" gfns, instead sending such accesses down + * the slow emulation path every time. */ - return 1ULL << (shadow_phys_bits - PAGE_SHIFT); + return kvm_mmu_max_gfn() + 1; } static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, @@ -830,7 +831,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, { struct tdp_iter iter; - gfn_t end = tdp_mmu_max_gfn_host(); + gfn_t end = tdp_mmu_max_gfn_exclusive(); gfn_t start = 0; for_each_tdp_pte_min_level(iter, root, zap_level, start, end) { @@ -923,7 +924,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, { struct tdp_iter iter; - end = min(end, tdp_mmu_max_gfn_host()); + end = min(end, tdp_mmu_max_gfn_exclusive()); lockdep_assert_held_write(&kvm->mmu_lock); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 547ba00ef64f..43174a8d9497 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11995,8 +11995,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *new, enum kvm_mr_change change) { - if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) + if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) { + if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn()) + return -EINVAL; + return kvm_alloc_memslot_metadata(kvm, new); + } if (change == KVM_MR_FLAGS_ONLY) memcpy(&new->arch, &old->arch, sizeof(old->arch)); From d495f942f40aa412f8d4d65951152648cfa09903 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Apr 2022 12:30:13 +0200 Subject: [PATCH 460/803] KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT When KVM_EXIT_SYSTEM_EVENT was introduced, it included a flags member that at the time was unused. Unfortunately this extensibility mechanism has several issues: - x86 is not writing the member, so it would not be possible to use it on x86 except for new events - the member is not aligned to 64 bits, so the definition of the uAPI struct is incorrect for 32- on 64-bit userspace. This is a problem for RISC-V, which supports CONFIG_KVM_COMPAT, but fortunately usage of flags was only introduced in 5.18. Since padding has to be introduced, place a new field in there that tells if the flags field is valid. To allow further extensibility, in fact, change flags to an array of 16 values, and store how many of the values are valid. The availability of the new ndata field is tied to a system capability; all architectures are changed to fill in the field. To avoid breaking compilation of userspace that was using the flags field, provide a userspace-only union to overlap flags with data[0]. The new field is placed at the same offset for both 32- and 64-bit userspace. Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Gonda Cc: Sean Christopherson Signed-off-by: Paolo Bonzini Reported-by: kernel test robot Message-Id: <20220422103013.34832-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 24 +++++++++++++++++------- arch/arm64/kvm/psci.c | 3 ++- arch/riscv/kvm/vcpu_sbi.c | 5 +++-- arch/x86/kvm/x86.c | 2 ++ include/uapi/linux/kvm.h | 10 +++++++++- virt/kvm/kvm_main.c | 1 + 6 files changed, 34 insertions(+), 11 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 85c7abc51af5..4a900cdbc62e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + __u64 data[16]; } system_event; If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered a system-level event using some architecture specific mechanism (hypercall or some special instruction). In case of ARM64, this is triggered using -HVC instruction based PSCI call from the vcpu. The 'type' field describes -the system-level event type. The 'flags' field describes architecture -specific flags for the system-level event. +HVC instruction based PSCI call from the vcpu. +The 'type' field describes the system-level event type. Valid values for 'type' are: - KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the @@ -6010,10 +6010,20 @@ Valid values for 'type' are: to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. -Valid flags are: +If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain +architecture specific information for the system-level event. Only +the first `ndata` items (possibly zero) of the data array are valid. - - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued - a SYSTEM_RESET2 call according to v1.1 of the PSCI specification. + - for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if + the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI + specification. + + - for RISC-V, data[0] is set to the value of the second argument of the + ``sbi_system_reset`` call. + +Previous versions of Linux defined a `flags` member in this struct. The +field is now aliased to `data[0]`. Userspace can assume that it is only +written if ndata is greater than 0. :: diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index baac2b405f23..708d80e8e60d 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; - vcpu->run->system_event.flags = flags; + vcpu->run->system_event.ndata = 1; + vcpu->run->system_event.data[0] = flags; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a09ecb97b890..d45e7da3f0d3 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run, - u32 type, u64 flags) + u32 type, u64 reason) { unsigned long i; struct kvm_vcpu *tmp; @@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, memset(&run->system_event, 0, sizeof(run->system_event)); run->system_event.type = type; - run->system_event.flags = flags; + run->system_event.ndata = 1; + run->system_event.data[0] = reason; run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43174a8d9497..07d789b1d366 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10015,12 +10015,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; + vcpu->run->system_event.ndata = 0; r = 0; goto out; } if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; + vcpu->run->system_event.ndata = 0; r = 0; goto out; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 91a6fe4e02c0..6a184d260c7f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,7 +445,13 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfb7dabdbc63..ac57fc2c935f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4333,6 +4333,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return 0; #endif case KVM_CAP_BINARY_STATS_FD: + case KVM_CAP_SYSTEM_EVENT_DATA: return 1; default: break; From 44187235cbcc7c1129ea7c004bc12f8757d29415 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Fri, 29 Apr 2022 03:17:57 +0000 Subject: [PATCH 461/803] KVM: x86/mmu: fix potential races when walking host page table KVM uses lookup_address_in_mm() to detect the hugepage size that the host uses to map a pfn. The function suffers from several issues: - no usage of READ_ONCE(*). This allows multiple dereference of the same page table entry. The TOCTOU problem because of that may cause KVM to incorrectly treat a newly generated leaf entry as a nonleaf one, and dereference the content by using its pfn value. - the information returned does not match what KVM needs; for non-present entries it returns the level at which the walk was terminated, as long as the entry is not 'none'. KVM needs level information of only 'present' entries, otherwise it may regard a non-present PXE entry as a present large page mapping. - the function is not safe for mappings that can be torn down, because it does not disable IRQs and because it returns a PTE pointer which is never safe to dereference after the function returns. So implement the logic for walking host page tables directly in KVM, and stop using lookup_address_in_mm(). Cc: Sean Christopherson Cc: Paolo Bonzini Signed-off-by: Mingwei Zhang Message-Id: <20220429031757.2042406-1-mizhang@google.com> [Inline in host_pfn_mapping_level, ensure no semantic change for its callers. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 47 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5f6225c384e6..64a2a7e2be90 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2804,8 +2804,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, const struct kvm_memory_slot *slot) { unsigned long hva; - pte_t *pte; - int level; + unsigned long flags; + int level = PG_LEVEL_4K; + pgd_t pgd; + p4d_t p4d; + pud_t pud; + pmd_t pmd; if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn)) return PG_LEVEL_4K; @@ -2820,10 +2824,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, */ hva = __gfn_to_hva_memslot(slot, gfn); - pte = lookup_address_in_mm(kvm->mm, hva, &level); - if (unlikely(!pte)) - return PG_LEVEL_4K; + /* + * Lookup the mapping level in the current mm. The information + * may become stale soon, but it is safe to use as long as + * 1) mmu_notifier_retry was checked after taking mmu_lock, and + * 2) mmu_lock is taken now. + * + * We still need to disable IRQs to prevent concurrent tear down + * of page tables. + */ + local_irq_save(flags); + pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + if (pgd_none(pgd)) + goto out; + + p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + if (p4d_none(p4d) || !p4d_present(p4d)) + goto out; + + pud = READ_ONCE(*pud_offset(&p4d, hva)); + if (pud_none(pud) || !pud_present(pud)) + goto out; + + if (pud_large(pud)) { + level = PG_LEVEL_1G; + goto out; + } + + pmd = READ_ONCE(*pmd_offset(&pud, hva)); + if (pmd_none(pmd) || !pmd_present(pmd)) + goto out; + + if (pmd_large(pmd)) + level = PG_LEVEL_2M; + +out: + local_irq_restore(flags); return level; } From 643d95aac59a060c2730975988aedc387f0f9f44 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 07:57:53 -0700 Subject: [PATCH 462/803] Revert "x86/mm: Introduce lookup_address_in_mm()" Drop lookup_address_in_mm() now that KVM is providing it's own variant of lookup_address_in_pgd() that is safe for use with user addresses, e.g. guards against page tables being torn down. A variant that provides a non-init mm is inherently dangerous and flawed, as the only reason to use an mm other than init_mm is to walk a userspace mapping, and lookup_address_in_pgd() does not play nice with userspace mappings, e.g. doesn't disable IRQs to block TLB shootdowns and doesn't use READ_ONCE() to ensure an upper level entry isn't converted to a huge page between checking the PAGE_SIZE bit and grabbing the address of the next level down. This reverts commit 13c72c060f1ba6f4eddd7b1c4f52a8aded43d6d9. Signed-off-by: Sean Christopherson Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pgtable_types.h | 4 ---- arch/x86/mm/pat/set_memory.c | 11 ----------- 2 files changed, 15 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 40497a9020c6..407084d9fd99 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -559,10 +559,6 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); - -struct mm_struct; -extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, - unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index abf5ed76e4b7..0656db33574d 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -638,17 +638,6 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) } EXPORT_SYMBOL_GPL(lookup_address); -/* - * Lookup the page table entry for a virtual address in a given mm. Return a - * pointer to the entry and the level of the mapping. - */ -pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, - unsigned int *level) -{ - return lookup_address_in_pgd(pgd_offset(mm, address), address, level); -} -EXPORT_SYMBOL_GPL(lookup_address_in_mm); - static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, unsigned int *level) { From 2bfed7d2ffa5d86c462d3e2067f2832eaf8c04c7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 19 Mar 2022 02:00:11 +0100 Subject: [PATCH 463/803] selftests/seccomp: Don't call read() on TTY from background pgrp Since commit 92d25637a3a4 ("kselftest: signal all child processes"), tests are executed in background process groups. This means that trying to read from stdin now throws SIGTTIN when stdin is a TTY, which breaks some seccomp selftests that try to use read(0, NULL, 0) as a dummy syscall. The simplest way to fix that is probably to just use -1 instead of 0 as the dummy read()'s FD. Fixes: 92d25637a3a4 ("kselftest: signal all child processes") Signed-off-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220319010011.1374622-1-jannh@google.com --- tools/testing/selftests/seccomp/seccomp_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 9d126d7fabdb..313bb0cbfb1e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -955,7 +955,7 @@ TEST(ERRNO_valid) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(E2BIG, errno); } @@ -974,7 +974,7 @@ TEST(ERRNO_zero) EXPECT_EQ(parent, syscall(__NR_getppid)); /* "errno" of 0 is ok. */ - EXPECT_EQ(0, read(0, NULL, 0)); + EXPECT_EQ(0, read(-1, NULL, 0)); } /* @@ -995,7 +995,7 @@ TEST(ERRNO_capped) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(4095, errno); } @@ -1026,7 +1026,7 @@ TEST(ERRNO_order) ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(12, errno); } @@ -2623,7 +2623,7 @@ void *tsync_sibling(void *data) ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); if (!ret) return (void *)SIBLING_EXIT_NEWPRIVS; - read(0, NULL, 0); + read(-1, NULL, 0); return (void *)SIBLING_EXIT_UNKILLED; } From f751d8eac17692905cdd6935f72d523d8adf3b65 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Apr 2022 14:43:04 -0400 Subject: [PATCH 464/803] KVM: x86: work around QEMU issue with synthetic CPUID leaves Synthesizing AMD leaves up to 0x80000021 caused problems with QEMU, which assumes the *host* CPUID[0x80000000].EAX is higher or equal to what KVM_GET_SUPPORTED_CPUID reports. This causes QEMU to issue bogus host CPUIDs when preparing the input to KVM_SET_CPUID2. It can even get into an infinite loop, which is only terminated by an abort(): cpuid_data is full, no space for cpuid(eax:0x8000001d,ecx:0x3e) To work around this, only synthesize those leaves if 0x8000001d exists on the host. The synthetic 0x80000021 leaf is mostly useful on Zen2, which satisfies the condition. Fixes: f144c49e8c39 ("KVM: x86: synthesize CPUID leaf 0x80000021h if useful") Reported-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b24ca7f4ed7c..598334ed5fbc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1085,12 +1085,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0x80000000: entry->eax = min(entry->eax, 0x80000021); /* - * Serializing LFENCE is reported in a multitude of ways, - * and NullSegClearsBase is not reported in CPUID on Zen2; - * help userspace by providing the CPUID leaf ourselves. + * Serializing LFENCE is reported in a multitude of ways, and + * NullSegClearsBase is not reported in CPUID on Zen2; help + * userspace by providing the CPUID leaf ourselves. + * + * However, only do it if the host has CPUID leaf 0x8000001d. + * QEMU thinks that it can query the host blindly for that + * CPUID leaf if KVM reports that it supports 0x8000001d or + * above. The processor merrily returns values from the + * highest Intel leaf which QEMU tries to use as the guest's + * 0x8000001d. Even worse, this can result in an infinite + * loop if said highest leaf has no subleaves indexed by ECX. */ - if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) - || !static_cpu_has_bug(X86_BUG_NULL_SEG)) + if (entry->eax >= 0x8000001d && + (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) + || !static_cpu_has_bug(X86_BUG_NULL_SEG))) entry->eax = max(entry->eax, 0x80000021); break; case 0x80000001: From adee8aa22a9298148b3f86a83a7940e7a3329ad9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Apr 2022 23:09:49 +0200 Subject: [PATCH 465/803] Revert "arm: dts: at91: Fix boolean properties with values" This reverts commit 0dc23d1a8e17, which caused another regression as the pinctrl code actually expects an integer value of 0 or 1 rather than a simple boolean property. Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/at91-kizbox3-hs.dts | 2 +- arch/arm/boot/dts/at91-kizbox3_common.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-kizbox3-hs.dts b/arch/arm/boot/dts/at91-kizbox3-hs.dts index f7d90cf1bb77..2799b2a1f4d2 100644 --- a/arch/arm/boot/dts/at91-kizbox3-hs.dts +++ b/arch/arm/boot/dts/at91-kizbox3-hs.dts @@ -225,7 +225,7 @@ pinctrl_pio_io_reset: gpio_io_reset { pinmux = ; bias-disable; - drive-open-drain; + drive-open-drain = <1>; output-low; }; pinctrl_pio_input: gpio_input { diff --git a/arch/arm/boot/dts/at91-kizbox3_common.dtsi b/arch/arm/boot/dts/at91-kizbox3_common.dtsi index 465664628419..abe27adfa4d6 100644 --- a/arch/arm/boot/dts/at91-kizbox3_common.dtsi +++ b/arch/arm/boot/dts/at91-kizbox3_common.dtsi @@ -211,7 +211,7 @@ pinmux = , //DATA ; //CLK bias-disable; - drive-open-drain; + drive-open-drain = <1>; }; pinctrl_pwm0 { From a3d0562d4dc039bca39445e1cddde7951662e17d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Apr 2022 12:27:30 -0400 Subject: [PATCH 466/803] Revert "SUNRPC: attempt AF_LOCAL connect on setup" This reverts commit 7073ea8799a8cf73db60270986f14e4aae20fa80. We must not try to connect the socket while the transport is under construction, because the mechanisms to safely tear it down are not in place. As the code stands, we end up leaking the sockets on a connection error. Reported-by: wanghai (M) Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 25b8a8ead56b..650102a9c86a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2875,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); From 38dcd9570d6fced1dcfee226d5b29722b070ce90 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 28 Apr 2022 12:45:10 +0800 Subject: [PATCH 467/803] selftests/net: add missing tests to Makefile When generating the selftests to another folder, the fixed tests are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net" INSTALL_PATH=/tmp/kselftests Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3fe2515aa616..0f2ebc38d893 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,7 +30,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_time.sh +TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -54,6 +54,7 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender +TEST_PROGS += test_vxlan_vnifiltering.sh TEST_FILES := settings From f62c5acc800eebfe25bf6738b2cc8aa5d6aa7598 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 28 Apr 2022 12:45:11 +0800 Subject: [PATCH 468/803] selftests/net/forwarding: add missing tests to Makefile When generating the selftests to another folder, the fixed tests are missing as they are not in Makefile, e.g. make -C tools/testing/selftests/ install \ TARGETS="net/forwarding" INSTALL_PATH=/tmp/kselftests Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/Makefile | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8fa97ae9af9e..c87e674b61b1 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -2,15 +2,31 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ bridge_vlan_unaware.sh \ + custom_multipath_hash.sh \ + dual_vxlan_bridge.sh \ + ethtool_extended_state.sh \ ethtool.sh \ + gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ + gre_multipath_nh_res.sh \ + gre_multipath_nh.sh \ gre_multipath.sh \ + hw_stats_l3.sh \ ip6_forward_instats_vrf.sh \ + ip6gre_custom_multipath_hash.sh \ + ip6gre_flat_key.sh \ + ip6gre_flat_keys.sh \ + ip6gre_flat.sh \ + ip6gre_hier_key.sh \ + ip6gre_hier_keys.sh \ + ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ @@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + pedit_dsfield.sh \ + pedit_ip.sh \ + pedit_l4port.sh \ + q_in_vni_ipv6.sh \ + q_in_vni.sh \ router_bridge.sh \ router_bridge_vlan.sh \ router_broadcast.sh \ + router_mpath_nh_res.sh \ router_mpath_nh.sh \ router_multicast.sh \ router_multipath.sh \ + router_nh.sh \ router.sh \ router_vid_1.sh \ sch_ets.sh \ + sch_red.sh \ sch_tbf_ets.sh \ sch_tbf_prio.sh \ sch_tbf_root.sh \ + skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ tc_mpls_l2vpn.sh \ + tc_police.sh \ tc_shblocks.sh \ tc_vlan_modify.sh \ + vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_bridge_1d_ipv6.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ + vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh TEST_PROGS_EXTENDED := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ + ip6gre_lib.sh \ ipip_lib.sh \ lib.sh \ mirror_gre_lib.sh \ From ff5265d45345d01fefc98fcb9ae891b59633c919 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 14:25:43 +0800 Subject: [PATCH 469/803] net: ethernet: mediatek: add missing of_node_put() in mtk_sgmii_init() The node pointer returned by of_parse_phandle() with refcount incremented, so add of_node_put() after using it in mtk_sgmii_init(). Fixes: 9ffee4a8276c ("net: ethernet: mediatek: Extend SGMII related functions") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428062543.64883-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_sgmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index 32d83421226a..5897940a418b 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3) break; ss->regmap[i] = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(ss->regmap[i])) return PTR_ERR(ss->regmap[i]); } From e87f66b38e66dffdec9daa9f8f0eb044e9a62e3b Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Thu, 28 Apr 2022 23:19:32 +0200 Subject: [PATCH 470/803] net: mdio: Fix ENOMEM return value in BCM6368 mux bus controller Error values inside the probe function must be < 0. The ENOMEM return value has the wrong sign: it is positive instead of negative. Add a minus sign. Fixes: e239756717b5 ("net: mdio: Add BCM6368 MDIO mux bus controller") Signed-off-by: Niels Dossche Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220428211931.8130-1-dossche.niels@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mux-bcm6368.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c index 6dcbf987d61b..8b444a8eb6b5 100644 --- a/drivers/net/mdio/mdio-mux-bcm6368.c +++ b/drivers/net/mdio/mdio-mux-bcm6368.c @@ -115,7 +115,7 @@ static int bcm6368_mdiomux_probe(struct platform_device *pdev) md->mii_bus = devm_mdiobus_alloc(&pdev->dev); if (!md->mii_bus) { dev_err(&pdev->dev, "mdiomux bus alloc failed\n"); - return ENOMEM; + return -ENOMEM; } bus = md->mii_bus; From 52b2abef450a78e25d485ac61e32f4ce86a87701 Mon Sep 17 00:00:00 2001 From: Qiao Ma Date: Thu, 28 Apr 2022 20:30:16 +0800 Subject: [PATCH 471/803] hinic: fix bug of wq out of bound access If wq has only one page, we need to check wqe rolling over page by compare end_idx and curr_idx, and then copy wqe to shadow wqe to avoid out of bound access. This work has been done in hinic_get_wqe, but missed for hinic_read_wqe. This patch fixes it, and removes unnecessary MASKED_WQE_IDX(). Fixes: 7dd29ee12865 ("hinic: add sriov feature support") Signed-off-by: Qiao Ma Reviewed-by: Xunlei Pang Link: https://lore.kernel.org/r/282817b0e1ae2e28fdf3ed8271a04e77f57bf42e.1651148587.git.mqaio@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c index 2d9b06d7caad..f7dc7d825f63 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c @@ -771,7 +771,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size, /* If we only have one page, still need to get shadown wqe when * wqe rolling-over page */ - if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) { + if (curr_pg != end_pg || end_prod_idx < *prod_idx) { void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size]; copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx); @@ -841,7 +841,10 @@ struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size, *cons_idx = curr_cons_idx; - if (curr_pg != end_pg) { + /* If we only have one page, still need to get shadown wqe when + * wqe rolling-over page + */ + if (curr_pg != end_pg || end_cons_idx < curr_cons_idx) { void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size]; copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx); From fee34dd199384a483f84806a5cbcf8d657a481cc Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Thu, 28 Apr 2022 12:37:09 +0530 Subject: [PATCH 472/803] net: dsa: ksz9477: port mirror sniffing limited to one port This patch limits the sniffing to only one port during the mirror add. And during the mirror_del it checks for all the ports using the sniff, if and only if no other ports are referring, sniffing is disabled. The code is updated based on the review comments of LAN937x port mirror patch. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20210422094257.1641396-8-prasanna.vengateshan@microchip.com/ Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Prasanna Vengateshan Signed-off-by: Arun Ramadoss Link: https://lore.kernel.org/r/20220428070709.7094-1-arun.ramadoss@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 38 ++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 8222c8a6c5ec..7310d19d1f06 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1021,14 +1021,32 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, bool ingress, struct netlink_ext_ack *extack) { struct ksz_device *dev = ds->priv; + u8 data; + int p; + + /* Limit to one sniffer port + * Check if any of the port is already set for sniffing + * If yes, instruct the user to remove the previous entry & exit + */ + for (p = 0; p < dev->port_cnt; p++) { + /* Skip the current sniffing port */ + if (p == mirror->to_local_port) + continue; + + ksz_pread8(dev, p, P_MIRROR_CTRL, &data); + + if (data & PORT_MIRROR_SNIFFER) { + NL_SET_ERR_MSG_MOD(extack, + "Sniffer port is already configured, delete existing rules & retry"); + return -EBUSY; + } + } if (ingress) ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true); else ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true); - ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false); - /* configure mirror port */ ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, true); @@ -1042,16 +1060,28 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror) { struct ksz_device *dev = ds->priv; + bool in_use = false; u8 data; + int p; if (mirror->ingress) ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false); else ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false); - ksz_pread8(dev, port, P_MIRROR_CTRL, &data); - if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) + /* Check if any of the port is still referring to sniffer port */ + for (p = 0; p < dev->port_cnt; p++) { + ksz_pread8(dev, p, P_MIRROR_CTRL, &data); + + if ((data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) { + in_use = true; + break; + } + } + + /* delete sniffing if there are no other mirroring rules */ + if (!in_use) ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false); } From a9e9b091a1c14ecd8bd9d3214a62142a1786fe30 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 17:53:17 +0800 Subject: [PATCH 473/803] net: dsa: mt7530: add missing of_node_put() in mt7530_setup() Add of_node_put() if of_get_phy_mode() fails in mt7530_setup() Fixes: 0c65b2b90d13 ("net: of_get_phy_mode: Change API to solve int/unit warnings") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428095317.538829-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 19f0035d4410..fe3cb26f4287 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2229,6 +2229,7 @@ mt7530_setup(struct dsa_switch *ds) ret = of_get_phy_mode(mac_np, &interface); if (ret && ret != -ENODEV) { of_node_put(mac_np); + of_node_put(phy_node); return ret; } id = of_mdio_parse_addr(ds->dev, phy_node); From 1a15267b7be77e0792cf0c7b36ca65c8eb2df0d8 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 28 Apr 2022 17:57:16 +0800 Subject: [PATCH 474/803] net: stmmac: dwmac-sun8i: add missing of_node_put() in sun8i_dwmac_register_mdio_mux() The node pointer returned by of_get_child_by_name() with refcount incremented, so add of_node_put() after using it. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220428095716.540452-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index f86cc83003f2..f834472599f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -907,6 +907,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn, &gmac->mux_handle, priv, priv->mii); + of_node_put(mdio_mux); return ret; } From 95098d5ac2551769807031444e55a0da5d4f0952 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 29 Apr 2022 09:53:37 +0800 Subject: [PATCH 475/803] net: cpsw: add missing of_node_put() in cpsw_probe_dt() 'tmp_node' need be put before returning from cpsw_probe_dt(), so add missing of_node_put() in error path. Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_new.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index bd4b1528cf99..79e850fe4621 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1246,8 +1246,10 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) data->slave_data = devm_kcalloc(dev, CPSW_SLAVE_PORTS_NUM, sizeof(struct cpsw_slave_data), GFP_KERNEL); - if (!data->slave_data) + if (!data->slave_data) { + of_node_put(tmp_node); return -ENOMEM; + } /* Populate all the child nodes here... */ @@ -1341,6 +1343,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) err_node_put: of_node_put(port_np); + of_node_put(tmp_node); return ret; } From 39cb9faa5d46d0d0694f4b594ef905f517600c8e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Apr 2022 21:05:16 +0100 Subject: [PATCH 476/803] rxrpc: Enable IPv6 checksums on transport socket AF_RXRPC doesn't currently enable IPv6 UDP Tx checksums on the transport socket it opens and the checksums in the packets it generates end up 0. It probably should also enable IPv6 UDP Rx checksums and IPv4 UDP checksums. The latter only seem to be applied if the socket family is AF_INET and don't seem to apply if it's AF_INET6. IPv4 packets from an IPv6 socket seem to have checksums anyway. What seems to have happened is that the inet_inv_convert_csum() call didn't get converted to the appropriate udp_port_cfg parameters - and udp_sock_create() disables checksums unless explicitly told not too. Fix this by enabling the three udp_port_cfg checksum options. Fixes: 1a9b86c9fd95 ("rxrpc: use udp tunnel APIs instead of open code in rxrpc_open_socket") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Xin Long Reviewed-by: Marc Dionne cc: Vadim Fedorenko cc: David S. Miller cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller --- net/rxrpc/local_object.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a4111408ffd0..6a1611b0e303 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) local, srx->transport_type, srx->transport.family); udp_conf.family = srx->transport.family; + udp_conf.use_udp_checksums = true; if (udp_conf.family == AF_INET) { udp_conf.local_ip = srx->transport.sin.sin_addr; udp_conf.local_udp_port = srx->transport.sin.sin_port; @@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } else { udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; udp_conf.local_udp_port = srx->transport.sin6.sin6_port; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; #endif } ret = udp_sock_create(net, &udp_conf, &local->socket); From dba5bdd57bea587ea4f0b79b03c71135f84a7e8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Apr 2022 08:42:57 -0700 Subject: [PATCH 477/803] net: igmp: respect RCU rules in ip_mc_source() and ip_mc_msfilter() syzbot reported an UAF in ip_mc_sf_allow() [1] Whenever RCU protected list replaces an object, the pointer to the new object needs to be updated _before_ the call to kfree_rcu() or call_rcu() Because kfree_rcu(ptr, rcu) got support for NULL ptr only recently in commit 12edff045bc6 ("rcu: Make kfree_rcu() ignore NULL pointers"), I chose to use the conditional to make sure stable backports won't miss this detail. if (psl) kfree_rcu(psl, rcu); net/ipv6/mcast.c has similar issues, addressed in a separate patch. [1] BUG: KASAN: use-after-free in ip_mc_sf_allow+0x6bb/0x6d0 net/ipv4/igmp.c:2655 Read of size 4 at addr ffff88807d37b904 by task syz-executor.5/908 CPU: 0 PID: 908 Comm: syz-executor.5 Not tainted 5.18.0-rc4-syzkaller-00064-g8f4dd16603ce #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 ip_mc_sf_allow+0x6bb/0x6d0 net/ipv4/igmp.c:2655 raw_v4_input net/ipv4/raw.c:190 [inline] raw_local_deliver+0x4d1/0xbe0 net/ipv4/raw.c:218 ip_protocol_deliver_rcu+0xcf/0xb30 net/ipv4/ip_input.c:193 ip_local_deliver_finish+0x2ee/0x4c0 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x1b3/0x200 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish+0x1cb/0x2f0 net/ipv4/ip_input.c:437 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0xaa/0xd0 net/ipv4/ip_input.c:556 __netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5405 __netif_receive_skb+0x24/0x1b0 net/core/dev.c:5519 netif_receive_skb_internal net/core/dev.c:5605 [inline] netif_receive_skb+0x13e/0x8e0 net/core/dev.c:5664 tun_rx_batched.isra.0+0x460/0x720 drivers/net/tun.c:1534 tun_get_user+0x28b7/0x3e30 drivers/net/tun.c:1985 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2015 call_write_iter include/linux/fs.h:2050 [inline] new_sync_write+0x38a/0x560 fs/read_write.c:504 vfs_write+0x7c0/0xac0 fs/read_write.c:591 ksys_write+0x127/0x250 fs/read_write.c:644 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f3f12c3bbff Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 99 fd ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 cc fd ff ff 48 RSP: 002b:00007f3f13ea9130 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f3f12d9bf60 RCX: 00007f3f12c3bbff RDX: 0000000000000036 RSI: 0000000020002ac0 RDI: 00000000000000c8 RBP: 00007f3f12ce308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000036 R11: 0000000000000293 R12: 0000000000000000 R13: 00007fffb68dd79f R14: 00007f3f13ea9300 R15: 0000000000022000 Allocated by task 908: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:436 [inline] ____kasan_kmalloc mm/kasan/common.c:515 [inline] ____kasan_kmalloc mm/kasan/common.c:474 [inline] __kasan_kmalloc+0xa6/0xd0 mm/kasan/common.c:524 kasan_kmalloc include/linux/kasan.h:234 [inline] __do_kmalloc mm/slab.c:3710 [inline] __kmalloc+0x209/0x4d0 mm/slab.c:3719 kmalloc include/linux/slab.h:586 [inline] sock_kmalloc net/core/sock.c:2501 [inline] sock_kmalloc+0xb5/0x100 net/core/sock.c:2492 ip_mc_source+0xba2/0x1100 net/ipv4/igmp.c:2392 do_ip_setsockopt net/ipv4/ip_sockglue.c:1296 [inline] ip_setsockopt+0x2312/0x3ab0 net/ipv4/ip_sockglue.c:1432 raw_setsockopt+0x274/0x2c0 net/ipv4/raw.c:861 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 753: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free+0x13d/0x180 mm/kasan/common.c:328 kasan_slab_free include/linux/kasan.h:200 [inline] __cache_free mm/slab.c:3439 [inline] kmem_cache_free_bulk+0x69/0x460 mm/slab.c:3774 kfree_bulk include/linux/slab.h:437 [inline] kfree_rcu_work+0x51c/0xa10 kernel/rcu/tree.c:3318 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0x7e/0x90 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x990 kernel/rcu/tree.c:3595 ip_mc_msfilter+0x712/0xb60 net/ipv4/igmp.c:2510 do_ip_setsockopt net/ipv4/ip_sockglue.c:1257 [inline] ip_setsockopt+0x32e1/0x3ab0 net/ipv4/ip_sockglue.c:1432 raw_setsockopt+0x274/0x2c0 net/ipv4/raw.c:861 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0x7e/0x90 mm/kasan/generic.c:348 call_rcu+0x99/0x790 kernel/rcu/tree.c:3074 mpls_dev_notify+0x552/0x8a0 net/mpls/af_mpls.c:1656 notifier_call_chain+0xb5/0x200 kernel/notifier.c:84 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1938 call_netdevice_notifiers_extack net/core/dev.c:1976 [inline] call_netdevice_notifiers net/core/dev.c:1990 [inline] unregister_netdevice_many+0x92e/0x1890 net/core/dev.c:10751 default_device_exit_batch+0x449/0x590 net/core/dev.c:11245 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 The buggy address belongs to the object at ffff88807d37b900 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 4 bytes inside of 64-byte region [ffff88807d37b900, ffff88807d37b940) The buggy address belongs to the physical page: page:ffffea0001f4dec0 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88807d37b180 pfn:0x7d37b flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffff888010c41340 ffffea0001c795c8 ffff888010c40200 raw: ffff88807d37b180 ffff88807d37b000 000000010000001f 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x342040(__GFP_IO|__GFP_NOWARN|__GFP_COMP|__GFP_HARDWALL|__GFP_THISNODE), pid 2963, tgid 2963 (udevd), ts 139732238007, free_ts 139730893262 prep_new_page mm/page_alloc.c:2441 [inline] get_page_from_freelist+0xba2/0x3e00 mm/page_alloc.c:4182 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5408 __alloc_pages_node include/linux/gfp.h:587 [inline] kmem_getpages mm/slab.c:1378 [inline] cache_grow_begin+0x75/0x350 mm/slab.c:2584 cache_alloc_refill+0x27f/0x380 mm/slab.c:2957 ____cache_alloc mm/slab.c:3040 [inline] ____cache_alloc mm/slab.c:3023 [inline] __do_cache_alloc mm/slab.c:3267 [inline] slab_alloc mm/slab.c:3309 [inline] __do_kmalloc mm/slab.c:3708 [inline] __kmalloc+0x3b3/0x4d0 mm/slab.c:3719 kmalloc include/linux/slab.h:586 [inline] kzalloc include/linux/slab.h:714 [inline] tomoyo_encode2.part.0+0xe9/0x3a0 security/tomoyo/realpath.c:45 tomoyo_encode2 security/tomoyo/realpath.c:31 [inline] tomoyo_encode+0x28/0x50 security/tomoyo/realpath.c:80 tomoyo_realpath_from_path+0x186/0x620 security/tomoyo/realpath.c:288 tomoyo_get_realpath security/tomoyo/file.c:151 [inline] tomoyo_path_perm+0x21b/0x400 security/tomoyo/file.c:822 security_inode_getattr+0xcf/0x140 security/security.c:1350 vfs_getattr fs/stat.c:157 [inline] vfs_statx+0x16a/0x390 fs/stat.c:232 vfs_fstatat+0x8c/0xb0 fs/stat.c:255 __do_sys_newfstatat+0x91/0x110 fs/stat.c:425 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1356 [inline] free_pcp_prepare+0x549/0xd20 mm/page_alloc.c:1406 free_unref_page_prepare mm/page_alloc.c:3328 [inline] free_unref_page+0x19/0x6a0 mm/page_alloc.c:3423 __vunmap+0x85d/0xd30 mm/vmalloc.c:2667 __vfree+0x3c/0xd0 mm/vmalloc.c:2715 vfree+0x5a/0x90 mm/vmalloc.c:2746 __do_replace+0x16b/0x890 net/ipv6/netfilter/ip6_tables.c:1117 do_replace net/ipv6/netfilter/ip6_tables.c:1157 [inline] do_ip6t_set_ctl+0x90d/0xb90 net/ipv6/netfilter/ip6_tables.c:1639 nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101 ipv6_setsockopt+0x122/0x180 net/ipv6/ipv6_sockglue.c:1026 tcp_setsockopt+0x136/0x2520 net/ipv4/tcp.c:3696 __sys_setsockopt+0x2db/0x6a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Memory state around the buggy address: ffff88807d37b800: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc ffff88807d37b880: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc >ffff88807d37b900: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff88807d37b980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88807d37ba00: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc Fixes: c85bb41e9318 ("igmp: fix ip_mc_sf_allow race [v5]") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2ad3c7b42d6d..1d9e6d5e9a76 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) /* decrease mem now to avoid the memleak warning */ atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); - } else + } else { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); + } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); pmc->sfmode = msf->imsf_fmode; err = 0; done: From a9384a4c1d250cb40cebf50e41459426d160b08e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Apr 2022 09:20:36 -0700 Subject: [PATCH 478/803] mld: respect RCU rules in ip6_mc_source() and ip6_mc_msfilter() Whenever RCU protected list replaces an object, the pointer to the new object needs to be updated _before_ the call to kfree_rcu() or call_rcu() Also ip6_mc_msfilter() needs to update the pointer before releasing the mc_lock mutex. Note that linux-5.13 was supporting kfree_rcu(NULL, rcu), so this fix does not need the conditional test I was forced to use in the equivalent patch for IPv4. Fixes: 882ba1f73c06 ("mld: convert ipv6_mc_socklist->sflist to RCU") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 909f937befd7..7f695c39d9a8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, newpsl->sl_addr[i] = psl->sl_addr[i]; atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } + rcu_assign_pointer(pmc->sflist, newpsl); + kfree_rcu(psl, rcu); psl = newpsl; - rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psl->sl_count, psl->sl_addr, 0); atomic_sub(struct_size(psl, sl_addr, psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } - mutex_unlock(&idev->mc_lock); rcu_assign_pointer(pmc->sflist, newpsl); + mutex_unlock(&idev->mc_lock); + kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: From 2667ed10d9f01e250ba806276740782c89d77fda Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 28 Apr 2022 11:00:41 -0700 Subject: [PATCH 479/803] mm: Fix PASID use-after-free issue The PASID is being freed too early. It needs to stay around until after device drivers that might be using it have had a chance to clear it out of the hardware. The relevant refcounts are: mmget() /mmput() refcount the mm's address space mmgrab()/mmdrop() refcount the mm itself The PASID is currently tied to the life of the mm's address space and freed in __mmput(). This makes logical sense because the PASID can't be used once the address space is gone. But, this misses an important point: even after the address space is gone, the PASID will still be programmed into a device. Device drivers might, for instance, still need to flush operations that are outstanding and need to use that PASID. They do this at file->release() time. Device drivers call the IOMMU driver to hold a reference on the mm itself and drop it at file->release() time. But, the IOMMU driver holds a reference on the mm itself, not the address space. The address space (and the PASID) is long gone by the time the driver tries to clean up. This is effectively a use-after-free bug on the PASID. To fix this, move the PASID free operation from __mmput() to __mmdrop(). This ensures that the IOMMU driver's existing mmgrab() keeps the PASID allocated until it drops its mm reference. Fixes: 701fac40384f ("iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit") Reported-by: Zhangfei Gao Suggested-by: Jean-Philippe Brucker Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Tested-by: Zhangfei Gao Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220428180041.806809-1-fenghua.yu@intel.com --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 9796897560ab..35a3beff140b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -792,6 +792,7 @@ void __mmdrop(struct mm_struct *mm) mmu_notifier_subscriptions_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); + mm_pasid_drop(mm); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1190,7 +1191,6 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); - mm_pasid_drop(mm); mmdrop(mm); } From 47f753c1108e287edb3e27fad8a7511a9d55578e Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Fri, 29 Apr 2022 19:58:07 +0800 Subject: [PATCH 480/803] net: stmmac: disable Split Header (SPH) for Intel platforms Based on DesignWare Ethernet QoS datasheet, we are seeing the limitation of Split Header (SPH) feature is not supported for Ipv4 fragmented packet. This SPH limitation will cause ping failure when the packets size exceed the MTU size. For example, the issue happens once the basic ping packet size is larger than the configured MTU size and the data is lost inside the fragmented packet, replaced by zeros/corrupted values, and leads to ping fail. So, disable the Split Header for Intel platforms. v2: Add fixes tag in commit message. Fixes: 67afd6d1cfdf("net: stmmac: Add Split Header support and enable it in XGMAC cores") Cc: # 5.10.x Suggested-by: Ong, Boon Leong Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: Wong Vee Khee Signed-off-by: Tan Tee Min Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- include/linux/stmmac.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 63754a9c4ba7..0b0be0898ac5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -454,6 +454,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_gmac4 = 1; plat->force_sf_dma_mode = 0; plat->tso_en = 1; + plat->sph_disable = 1; /* Multiplying factor to the clk_eee_i clock time * period to make it closer to 100 ns. This value diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4a4b3651ab3e..2525a80353b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7021,7 +7021,7 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "TSO feature enabled\n"); } - if (priv->dma_cap.sphen) { + if (priv->dma_cap.sphen && !priv->plat->sph_disable) { ndev->hw_features |= NETIF_F_GRO; priv->sph_cap = true; priv->sph = priv->sph_cap; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 24eea1b05ca2..29917850f079 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -270,5 +270,6 @@ struct plat_stmmacenet_data { int msi_rx_base_vec; int msi_tx_base_vec; bool use_phy_wol; + bool sph_disable; }; #endif From da5c0f119203ad9728920456a0f52a6d850c01cd Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 29 Apr 2022 20:45:50 +0800 Subject: [PATCH 481/803] nfc: replace improper check device_is_registered() in netlink related functions The device_is_registered() in nfc core is used to check whether nfc device is registered in netlink related functions such as nfc_fw_download(), nfc_dev_up() and so on. Although device_is_registered() is protected by device_lock, there is still a race condition between device_del() and device_is_registered(). The root cause is that kobject_del() in device_del() is not protected by device_lock. (cleanup task) | (netlink task) | nfc_unregister_device | nfc_fw_download device_del | device_lock ... | if (!device_is_registered)//(1) kobject_del//(2) | ... ... | device_unlock The device_is_registered() returns the value of state_in_sysfs and the state_in_sysfs is set to zero in kobject_del(). If we pass check in position (1), then set zero in position (2). As a result, the check in position (1) is useless. This patch uses bool variable instead of device_is_registered() to judge whether the nfc device is registered, which is well synchronized. Fixes: 3e256b8f8dfa ("NFC: add nfc subsystem core") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- net/nfc/core.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index dc7a2404efdf..5b286e1e0a6f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; kfree_skb(skb); goto error; @@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + dev->shutting_down = false; device_unlock(&dev->dev); rc = nfc_genl_device_added(dev); @@ -1166,12 +1167,10 @@ void nfc_unregister_device(struct nfc_dev *dev) rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); } + dev->shutting_down = true; device_unlock(&dev->dev); if (dev->ops->check_presence) { - device_lock(&dev->dev); - dev->shutting_down = true; - device_unlock(&dev->dev); del_timer_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } From d270453a0d9ec10bb8a802a142fb1b3601a83098 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 29 Apr 2022 20:45:51 +0800 Subject: [PATCH 482/803] nfc: nfcmrvl: main: reorder destructive operations in nfcmrvl_nci_unregister_dev to avoid bugs There are destructive operations such as nfcmrvl_fw_dnld_abort and gpio_free in nfcmrvl_nci_unregister_dev. The resources such as firmware, gpio and so on could be destructed while the upper layer functions such as nfcmrvl_fw_dnld_start and nfcmrvl_nci_recv_frame is executing, which leads to double-free, use-after-free and null-ptr-deref bugs. There are three situations that could lead to double-free bugs. The first situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_fw_dnld_start | ... | nfcmrvl_nci_unregister_dev release_firmware() | nfcmrvl_fw_dnld_abort kfree(fw) //(1) | fw_dnld_over | release_firmware ... | kfree(fw) //(2) | ... The second situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_fw_dnld_start | ... | mod_timer | (wait a time) | fw_dnld_timeout | nfcmrvl_nci_unregister_dev fw_dnld_over | nfcmrvl_fw_dnld_abort release_firmware | fw_dnld_over kfree(fw) //(1) | release_firmware ... | kfree(fw) //(2) The third situation is shown below: (Thread 1) | (Thread 2) nfcmrvl_nci_recv_frame | if(..->fw_download_in_progress)| nfcmrvl_fw_dnld_recv_frame | queue_work | | fw_dnld_rx_work | nfcmrvl_nci_unregister_dev fw_dnld_over | nfcmrvl_fw_dnld_abort release_firmware | fw_dnld_over kfree(fw) //(1) | release_firmware | kfree(fw) //(2) The firmware struct is deallocated in position (1) and deallocated in position (2) again. The crash trace triggered by POC is like below: BUG: KASAN: double-free or invalid-free in fw_dnld_over Call Trace: kfree fw_dnld_over nfcmrvl_nci_unregister_dev nci_uart_tty_close tty_ldisc_kill tty_ldisc_hangup __tty_hangup.part.0 tty_release ... What's more, there are also use-after-free and null-ptr-deref bugs in nfcmrvl_fw_dnld_start. If we deallocate firmware struct, gpio or set null to the members of priv->fw_dnld in nfcmrvl_nci_unregister_dev, then, we dereference firmware, gpio or the members of priv->fw_dnld in nfcmrvl_fw_dnld_start, the UAF or NPD bugs will happen. This patch reorders destructive operations after nci_unregister_device in order to synchronize between cleanup routine and firmware download routine. The nci_unregister_device is well synchronized. If the device is detaching, the firmware download routine will goto error. If firmware download routine is executing, nci_unregister_device will wait until firmware download routine is finished. Fixes: 3194c6870158 ("NFC: nfcmrvl: add firmware download support") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 2fcf545012b1..1a5284de4341 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -183,6 +183,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) { struct nci_dev *ndev = priv->ndev; + nci_unregister_device(ndev); if (priv->ndev->nfc_dev->fw_download_in_progress) nfcmrvl_fw_dnld_abort(priv); @@ -191,7 +192,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) if (gpio_is_valid(priv->config.reset_n_io)) gpio_free(priv->config.reset_n_io); - nci_unregister_device(ndev); nci_free_device(ndev); kfree(priv); } From 6b292a04c694573a302686323fe15b1c7e673e5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Apr 2022 15:54:24 +0200 Subject: [PATCH 483/803] pci_irq_vector() can't be used in atomic context any longer. This conflicts with the usage of this function in nic_mbx_intr_handler(). Cache the Linux interrupt numbers in struct nicpf and use that cache in the interrupt handler to select the mailbox. Fixes: 495c66aca3da ("genirq/msi: Convert to new functions") Reported-by: Ondrej Mosnacek Signed-off-by: Thomas Gleixner Cc: Sunil Goutham Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=2041772 Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index f2f1ce81fd9c..0ec65ec634df 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -59,7 +59,7 @@ struct nicpf { /* MSI-X */ u8 num_vec; - bool irq_allocated[NIC_PF_MSIX_VECTORS]; + unsigned int irq_allocated[NIC_PF_MSIX_VECTORS]; char irq_name[NIC_PF_MSIX_VECTORS][20]; }; @@ -1150,7 +1150,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) u64 intr; u8 vf; - if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0)) + if (irq == nic->irq_allocated[NIC_PF_INTR_ID_MBOX0]) mbx = 0; else mbx = 1; @@ -1176,14 +1176,14 @@ static void nic_free_all_interrupts(struct nicpf *nic) for (irq = 0; irq < nic->num_vec; irq++) { if (nic->irq_allocated[irq]) - free_irq(pci_irq_vector(nic->pdev, irq), nic); - nic->irq_allocated[irq] = false; + free_irq(nic->irq_allocated[irq], nic); + nic->irq_allocated[irq] = 0; } } static int nic_register_interrupts(struct nicpf *nic) { - int i, ret; + int i, ret, irq; nic->num_vec = pci_msix_vec_count(nic->pdev); /* Enable MSI-X */ @@ -1201,13 +1201,13 @@ static int nic_register_interrupts(struct nicpf *nic) sprintf(nic->irq_name[i], "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - ret = request_irq(pci_irq_vector(nic->pdev, i), - nic_mbx_intr_handler, 0, + irq = pci_irq_vector(nic->pdev, i); + ret = request_irq(irq, nic_mbx_intr_handler, 0, nic->irq_name[i], nic); if (ret) goto fail; - nic->irq_allocated[i] = true; + nic->irq_allocated[i] = irq; } /* Enable mailbox interrupt */ From 79396934e289dbc501316c1d1f975bb4c88ae460 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 29 Apr 2022 09:43:03 -0700 Subject: [PATCH 484/803] net: dsa: b53: convert to phylink_pcs Convert B53 to use phylink_pcs for the serdes rather than hooking it into the MAC-layer callbacks. Fixes: 81c1681cbb9f ("net: dsa: b53: mark as non-legacy") Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 36 +++------------- drivers/net/dsa/b53/b53_priv.h | 24 ++++++----- drivers/net/dsa/b53/b53_serdes.c | 74 ++++++++++++++++++++++---------- drivers/net/dsa/b53/b53_serdes.h | 9 ++-- drivers/net/dsa/b53/b53_srab.c | 4 +- 5 files changed, 75 insertions(+), 72 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 77501f9c5915..fbb32aa49b24 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1354,46 +1354,25 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port, config->legacy_pre_march2020 = false; } -int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) +static struct phylink_pcs *b53_phylink_mac_select_pcs(struct dsa_switch *ds, + int port, + phy_interface_t interface) { struct b53_device *dev = ds->priv; - int ret = -EOPNOTSUPP; - if ((phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) && - dev->ops->serdes_link_state) - ret = dev->ops->serdes_link_state(dev, port, state); + if (!dev->ops->phylink_mac_select_pcs) + return NULL; - return ret; + return dev->ops->phylink_mac_select_pcs(dev, port, interface); } -EXPORT_SYMBOL(b53_phylink_mac_link_state); void b53_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { - struct b53_device *dev = ds->priv; - - if (mode == MLO_AN_PHY || mode == MLO_AN_FIXED) - return; - - if ((phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) && - dev->ops->serdes_config) - dev->ops->serdes_config(dev, port, mode, state); } EXPORT_SYMBOL(b53_phylink_mac_config); -void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port) -{ - struct b53_device *dev = ds->priv; - - if (dev->ops->serdes_an_restart) - dev->ops->serdes_an_restart(dev, port); -} -EXPORT_SYMBOL(b53_phylink_mac_an_restart); - void b53_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) @@ -2269,9 +2248,8 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_write = b53_phy_write16, .adjust_link = b53_adjust_link, .phylink_get_caps = b53_phylink_get_caps, - .phylink_mac_link_state = b53_phylink_mac_link_state, + .phylink_mac_select_pcs = b53_phylink_mac_select_pcs, .phylink_mac_config = b53_phylink_mac_config, - .phylink_mac_an_restart = b53_phylink_mac_an_restart, .phylink_mac_link_down = b53_phylink_mac_link_down, .phylink_mac_link_up = b53_phylink_mac_link_up, .port_enable = b53_enable_port, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 3085b6cc7d40..795cbffd5c2b 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -29,7 +29,6 @@ struct b53_device; struct net_device; -struct phylink_link_state; struct b53_io_ops { int (*read8)(struct b53_device *dev, u8 page, u8 reg, u8 *value); @@ -48,13 +47,10 @@ struct b53_io_ops { void (*irq_disable)(struct b53_device *dev, int port); void (*phylink_get_caps)(struct b53_device *dev, int port, struct phylink_config *config); + struct phylink_pcs *(*phylink_mac_select_pcs)(struct b53_device *dev, + int port, + phy_interface_t interface); u8 (*serdes_map_lane)(struct b53_device *dev, int port); - int (*serdes_link_state)(struct b53_device *dev, int port, - struct phylink_link_state *state); - void (*serdes_config)(struct b53_device *dev, int port, - unsigned int mode, - const struct phylink_link_state *state); - void (*serdes_an_restart)(struct b53_device *dev, int port); void (*serdes_link_set)(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); @@ -85,8 +81,15 @@ enum { BCM7278_DEVICE_ID = 0x7278, }; +struct b53_pcs { + struct phylink_pcs pcs; + struct b53_device *dev; + u8 lane; +}; + #define B53_N_PORTS 9 #define B53_N_PORTS_25 6 +#define B53_N_PCS 2 struct b53_port { u16 vlan_ctl_mask; @@ -143,6 +146,8 @@ struct b53_device { bool vlan_enabled; unsigned int num_ports; struct b53_port *ports; + + struct b53_pcs pcs[B53_N_PCS]; }; #define b53_for_each_port(dev, i) \ @@ -336,12 +341,9 @@ int b53_br_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); -int b53_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state); void b53_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); -void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port); void b53_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c index 555e5b372321..0690210770ff 100644 --- a/drivers/net/dsa/b53/b53_serdes.c +++ b/drivers/net/dsa/b53/b53_serdes.c @@ -17,6 +17,11 @@ #include "b53_serdes.h" #include "b53_regs.h" +static inline struct b53_pcs *pcs_to_b53_pcs(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct b53_pcs, pcs); +} + static void b53_serdes_write_blk(struct b53_device *dev, u8 offset, u16 block, u16 value) { @@ -60,51 +65,47 @@ static u16 b53_serdes_read(struct b53_device *dev, u8 lane, return b53_serdes_read_blk(dev, offset, block); } -void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode, - const struct phylink_link_state *state) +static int b53_serdes_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 reg; - if (lane == B53_INVALID_LANE) - return; - reg = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_CONTROL(1), SERDES_DIGITAL_BLK); - if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + if (interface == PHY_INTERFACE_MODE_1000BASEX) reg |= FIBER_MODE_1000X; else reg &= ~FIBER_MODE_1000X; b53_serdes_write(dev, lane, B53_SERDES_DIGITAL_CONTROL(1), SERDES_DIGITAL_BLK, reg); + + return 0; } -EXPORT_SYMBOL(b53_serdes_config); -void b53_serdes_an_restart(struct b53_device *dev, int port) +static void b53_serdes_an_restart(struct phylink_pcs *pcs) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 reg; - if (lane == B53_INVALID_LANE) - return; - reg = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMCR), SERDES_MII_BLK); reg |= BMCR_ANRESTART; b53_serdes_write(dev, lane, B53_SERDES_MII_REG(MII_BMCR), SERDES_MII_BLK, reg); } -EXPORT_SYMBOL(b53_serdes_an_restart); -int b53_serdes_link_state(struct b53_device *dev, int port, - struct phylink_link_state *state) +static void b53_serdes_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) { - u8 lane = b53_serdes_map_lane(dev, port); + struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev; + u8 lane = pcs_to_b53_pcs(pcs)->lane; u16 dig, bmsr; - if (lane == B53_INVALID_LANE) - return 1; - dig = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_STATUS, SERDES_DIGITAL_BLK); bmsr = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMSR), @@ -133,10 +134,7 @@ int b53_serdes_link_state(struct b53_device *dev, int port, state->pause |= MLO_PAUSE_RX; if (dig & PAUSE_RESOLUTION_TX_SIDE) state->pause |= MLO_PAUSE_TX; - - return 0; } -EXPORT_SYMBOL(b53_serdes_link_state); void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up) @@ -158,6 +156,12 @@ void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, } EXPORT_SYMBOL(b53_serdes_link_set); +static const struct phylink_pcs_ops b53_pcs_ops = { + .pcs_get_state = b53_serdes_get_state, + .pcs_config = b53_serdes_config, + .pcs_an_restart = b53_serdes_an_restart, +}; + void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, struct phylink_config *config) { @@ -187,9 +191,28 @@ void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, } EXPORT_SYMBOL(b53_serdes_phylink_get_caps); +struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev, + int port, + phy_interface_t interface) +{ + u8 lane = b53_serdes_map_lane(dev, port); + + if (lane == B53_INVALID_LANE || lane >= B53_N_PCS || + !dev->pcs[lane].dev) + return NULL; + + if (!phy_interface_mode_is_8023z(interface) && + interface != PHY_INTERFACE_MODE_SGMII) + return NULL; + + return &dev->pcs[lane].pcs; +} +EXPORT_SYMBOL(b53_serdes_phylink_mac_select_pcs); + int b53_serdes_init(struct b53_device *dev, int port) { u8 lane = b53_serdes_map_lane(dev, port); + struct b53_pcs *pcs; u16 id0, msb, lsb; if (lane == B53_INVALID_LANE) @@ -212,6 +235,11 @@ int b53_serdes_init(struct b53_device *dev, int port) (id0 >> SERDES_ID0_REV_NUM_SHIFT) & SERDES_ID0_REV_NUM_MASK, (u32)msb << 16 | lsb); + pcs = &dev->pcs[lane]; + pcs->dev = dev; + pcs->lane = lane; + pcs->pcs.ops = &b53_pcs_ops; + return 0; } EXPORT_SYMBOL(b53_serdes_init); diff --git a/drivers/net/dsa/b53/b53_serdes.h b/drivers/net/dsa/b53/b53_serdes.h index f47d5caa7557..ef81f5da5f81 100644 --- a/drivers/net/dsa/b53/b53_serdes.h +++ b/drivers/net/dsa/b53/b53_serdes.h @@ -107,14 +107,11 @@ static inline u8 b53_serdes_map_lane(struct b53_device *dev, int port) return dev->ops->serdes_map_lane(dev, port); } -int b53_serdes_get_link(struct b53_device *dev, int port); -int b53_serdes_link_state(struct b53_device *dev, int port, - struct phylink_link_state *state); -void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode, - const struct phylink_link_state *state); -void b53_serdes_an_restart(struct b53_device *dev, int port); void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode, phy_interface_t interface, bool link_up); +struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev, + int port, + phy_interface_t interface); void b53_serdes_phylink_get_caps(struct b53_device *dev, int port, struct phylink_config *config); #if IS_ENABLED(CONFIG_B53_SERDES) diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index c51b716657db..da0b889880f6 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -491,10 +491,8 @@ static const struct b53_io_ops b53_srab_ops = { .irq_disable = b53_srab_irq_disable, .phylink_get_caps = b53_srab_phylink_get_caps, #if IS_ENABLED(CONFIG_B53_SERDES) + .phylink_mac_select_pcs = b53_serdes_phylink_mac_select_pcs, .serdes_map_lane = b53_srab_serdes_map_lane, - .serdes_link_state = b53_serdes_link_state, - .serdes_config = b53_serdes_config, - .serdes_an_restart = b53_serdes_an_restart, .serdes_link_set = b53_serdes_link_set, #endif }; From 672c0c5173427e6b3e2a9bbb7be51ceeec78093a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 May 2022 13:57:58 -0700 Subject: [PATCH 485/803] Linux 5.18-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c3ec1ea42379..9a820c525b86 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Superb Owl # *DOCUMENTATION* From e75f88efac05bf4e107e4171d8db6d8c3937252d Mon Sep 17 00:00:00 2001 From: Andrei Lalaev Date: Fri, 15 Apr 2022 10:07:11 +0300 Subject: [PATCH 486/803] gpiolib: of: fix bounds check for 'gpio-reserved-ranges' Gpiolib interprets the elements of "gpio-reserved-ranges" as "start,size" because it clears "size" bits starting from the "start" bit in the according bitmap. So it has to use "greater" instead of "greater or equal" when performs bounds check to make sure that GPIOs are in the available range. Previous implementation skipped ranges that include the last GPIO in the range. I wrote the mail to the maintainers (https://lore.kernel.org/linux-gpio/20220412115554.159435-1-andrei.lalaev@emlid.com/T/#u) of the questioned DTSes (because I couldn't understand how the maintainers interpreted this property), but I haven't received a response. Since the questioned DTSes use "gpio-reserved-ranges = <0 4>" (i.e., the beginning of the range), this patch doesn't affect these DTSes at all. TBH this patch doesn't break any existing DTSes because none of them reserve gpios at the end of range. Fixes: 726cb3ba4969 ("gpiolib: Support 'gpio-reserved-ranges' property") Signed-off-by: Andrei Lalaev Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ae1ce319cd78..7e5e51d49d09 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -910,7 +910,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip) i, &start); of_property_read_u32_index(np, "gpio-reserved-ranges", i + 1, &count); - if (start >= chip->ngpio || start + count >= chip->ngpio) + if (start >= chip->ngpio || start + count > chip->ngpio) continue; bitmap_clear(chip->valid_mask, start, count); From e5f6e5d554ac274f9c8ba60078103d0425b93c19 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 11 Apr 2022 09:23:40 +0300 Subject: [PATCH 487/803] gpio: mvebu: drop pwm base assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwmchip_add() unconditionally assigns the base ID dynamically. Commit f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically") dropped all base assignment from drivers under drivers/pwm/. It missed this driver. Fix that. Fixes: f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically") Signed-off-by: Baruch Siach Reviewed-by: Uwe Kleine-König Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mvebu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 4c1f9e1091b7..a2c8dd329b31 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -871,13 +871,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev, mvpwm->chip.dev = dev; mvpwm->chip.ops = &mvebu_pwm_ops; mvpwm->chip.npwm = mvchip->chip.ngpio; - /* - * There may already be some PWM allocated, so we can't force - * mvpwm->chip.base to a fixed point like mvchip->chip.base. - * So, we let pwmchip_add() do the numbering and take the next free - * region. - */ - mvpwm->chip.base = -1; spin_lock_init(&mvpwm->lock); From a196c78b5443fc61af2c0490213b9d125482cbd1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 1 May 2022 21:19:50 -0600 Subject: [PATCH 488/803] io_uring: assign non-fixed early for async work We defer file assignment to ensure that fixed files work with links between a direct accept/open and the links that follow it. But this has the side effect that normal file assignment is then not complete by the time that request submission has been done. For deferred execution, if the file is a regular file, assign it when we do the async prep anyway. Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e01f595f5b7d..91de361ea9ab 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6947,7 +6947,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_req_prep_async(struct io_kiocb *req) { - if (!io_op_defs[req->opcode].needs_async_setup) + const struct io_op_def *def = &io_op_defs[req->opcode]; + + /* assign early for deferred execution for non-fixed file */ + if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) + req->file = io_file_get_normal(req, req->fd); + if (!def->needs_async_setup) return 0; if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; From 7b8943b821bafab492f43aafbd006b57c6b65845 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 25 Apr 2022 13:17:01 -0500 Subject: [PATCH 489/803] RDMA/irdma: Flush iWARP QP if modified to ERR from RTR state When connection establishment fails in iWARP mode, an app can drain the QPs and hang because flush isn't issued when the QP is modified from RTR state to error. Issue a flush in this case using function irdma_cm_disconn(). Update irdma_cm_disconn() to do flush when cm_id is NULL, which is the case when the QP is in RTR state and there is an error in the connection establishment. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20220425181703.1634-2-shiraz.saleem@intel.com Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 16 +++++----------- drivers/infiniband/hw/irdma/verbs.c | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index a98d962e5efb..90b4113e7071 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3462,12 +3462,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) } cm_id = iwqp->cm_id; - /* make sure we havent already closed this connection */ - if (!cm_id) { - spin_unlock_irqrestore(&iwqp->lock, flags); - return; - } - original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; @@ -3489,11 +3483,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) disconn_status = -ECONNRESET; } - if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || - original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || - last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || - last_ae == IRDMA_AE_BAD_CLOSE || - last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) { + if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || + original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || + last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || + last_ae == IRDMA_AE_BAD_CLOSE || + last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 46f475394af5..52f3e88f8569 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { - if (iwqp->cm_id && iwqp->hw_tcp_state) { + if (iwqp->hw_tcp_state) { spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->last_aeq = IRDMA_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_cm_disconn(iwqp); } + irdma_cm_disconn(iwqp); } else { int close_timer_started; From 2df6d895907b2f5dfbc558cbff7801bba82cb3cc Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 25 Apr 2022 13:17:02 -0500 Subject: [PATCH 490/803] RDMA/irdma: Reduce iWARP QP destroy time QP destroy is synchronous and waits for its refcnt to be decremented in irdma_cm_node_free_cb (for iWARP) which fires after the RCU grace period elapses. Applications running a large number of connections are exposed to high wait times on destroy QP for events like SIGABORT. The long pole for this wait time is the firing of the call_rcu callback during a CM node destroy which can be slow. It holds the QP reference count and blocks the destroy QP from completing. call_rcu only needs to make sure that list walkers have a reference to the cm_node object before freeing it and thus need to wait for grace period elapse. The rest of the connection teardown in irdma_cm_node_free_cb is moved out of the grace period wait in irdma_destroy_connection. Also, replace call_rcu with a simple kfree_rcu as it just needs to do a kfree on the cm_node Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Link: https://lore.kernel.org/r/20220425181703.1634-3-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 90b4113e7071..638bf4a1ed94 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -2308,10 +2308,8 @@ err: return NULL; } -static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) +static void irdma_destroy_connection(struct irdma_cm_node *cm_node) { - struct irdma_cm_node *cm_node = - container_of(rcu_head, struct irdma_cm_node, rcu_head); struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; @@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) } cm_core->cm_free_ah(cm_node); - kfree(cm_node); } /** @@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) spin_unlock_irqrestore(&cm_core->ht_lock, flags); - /* wait for all list walkers to exit their grace period */ - call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb); + irdma_destroy_connection(cm_node); + + kfree_rcu(cm_node, rcu_head); } /** From 1c9043ae0667a43bd87beeebbdd4bed674713629 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 25 Apr 2022 13:17:03 -0500 Subject: [PATCH 491/803] RDMA/irdma: Fix possible crash due to NULL netdev in notifier For some net events in irdma_net_event notifier, the netdev can be NULL which will cause a crash in rdma_vlan_dev_real_dev. Fix this by moving all processing to the NETEVENT_NEIGH_UPDATE case where the netdev is guaranteed to not be NULL. Fixes: 6702bc147448 ("RDMA/irdma: Fix netdev notifications for vlan's") Link: https://lore.kernel.org/r/20220425181703.1634-4-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/utils.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 346c2c5dabdf..81760415d66c 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, u32 local_ipaddr[4] = {}; bool ipv4 = true; - real_dev = rdma_vlan_dev_real_dev(netdev); - if (!real_dev) - real_dev = netdev; - - ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); - if (!ibdev) - return NOTIFY_DONE; - - iwdev = to_iwdev(ibdev); - switch (event) { case NETEVENT_NEIGH_UPDATE: + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); + if (!ibdev) + return NOTIFY_DONE; + + iwdev = to_iwdev(ibdev); p = (__be32 *)neigh->primary_key; if (neigh->tbl->family == AF_INET6) { ipv4 = false; @@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, irdma_manage_arp_cache(iwdev->rf, neigh->ha, local_ipaddr, ipv4, IRDMA_ARP_DELETE); + ib_device_put(ibdev); break; default: break; } - ib_device_put(ibdev); - return NOTIFY_DONE; } From 285d5731a0cb2dd3a12ddf34d67be4e4965e64da Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 26 Apr 2022 10:49:36 +0800 Subject: [PATCH 492/803] Revert "block: release rq qos structures for queue without disk" This reverts commit daaca3522a8e67c46e39ef09c1d542e866f85f3b. Commit daaca3522a8e ("block: release rq qos structures for queue without disk") is only needed for v5.15~v5.17, and isn't needed for v5.18, so revert it. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220426024936.3321341-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 937bb6b86331..bc0506772152 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -50,7 +50,6 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - /* cleanup rq qos structures for queue without disk */ - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); From b5d1274409d0eec6d826f65d6dafebf9d77a1b99 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Mar 2022 18:00:40 +0100 Subject: [PATCH 493/803] KVM: s390: Fix lockdep issue in vm memop Issuing a memop on a protected vm does not make sense, neither is the memory readable/writable, nor does it make sense to check storage keys. This is why the ioctl will return -EINVAL when it detects the vm to be protected. However, in order to ensure that the vm cannot become protected during the memop, the kvm->lock would need to be taken for the duration of the ioctl. This is also required because kvm_s390_pv_is_protected asserts that the lock must be held. Instead, don't try to prevent this. If user space enables secure execution concurrently with a memop it must accecpt the possibility of the memop failing. Still check if the vm is currently protected, but without locking and consider it a heuristic. Fixes: ef11c9463ae0 ("KVM: s390: Add vm IOCTL for key checked guest absolute memory access") Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220322153204.2637400-1-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kvm/kvm-s390.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index da3dabda1a12..76ad6408cb2c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2384,7 +2384,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; - if (kvm_s390_pv_is_protected(kvm)) + /* + * This is technically a heuristic only, if the kvm->lock is not + * taken, it is not guaranteed that the vm is/remains non-protected. + * This is ok from a kernel perspective, wrongdoing is detected + * on the access, -EFAULT is returned and the vm may crash the + * next time it accesses the memory in question. + * There is no sane usecase to do switching and a memop on two + * different CPUs at the same time. + */ + if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { if (access_key_invalid(mop->key)) From 6056a92ceb2a7705d61df7ec5370548e96aee258 Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Mon, 2 May 2022 08:09:17 -0700 Subject: [PATCH 494/803] scsi: scsi_dh_alua: Properly handle the ALUA transitioning state The handling of the ALUA transitioning state is currently broken. When a target goes into this state, it is expected that the target is allowed to stay in this state for the implicit transition timeout without a path failure. The handler has this logic, but it gets skipped currently. When the target transitions, there is in-flight I/O from the initiator. The first of these responses from the target will be a unit attention letting the initiator know that the ALUA state has changed. The remaining in-flight I/Os, before the initiator finds out that the portal state has changed, will return not ready, ALUA state is transitioning. The portal state will change to SCSI_ACCESS_STATE_TRANSITIONING. This will lead to all new I/O immediately failing the path unexpectedly. The path failure happens in less than a second instead of the expected successes until the transition timer is exceeded. Allow I/Os to continue while the path is in the ALUA transitioning state. The handler already takes care of a target that stays in the transitioning state for too long by changing the state to ALUA state standby once the transition timeout is exceeded at which point the path will fail. Link: https://lore.kernel.org/r/CAHZQxy+4sTPz9+pY3=7VJH+CLUJsDct81KtnR2be8ycN5mhqTg@mail.gmail.com Reviewed-by: Hannes Reinecke Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 37d06f993b76..1d9be771f3ee 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) case SCSI_ACCESS_STATE_OPTIMAL: case SCSI_ACCESS_STATE_ACTIVE: case SCSI_ACCESS_STATE_LBA: - return BLK_STS_OK; case SCSI_ACCESS_STATE_TRANSITIONING: - return BLK_STS_AGAIN; + return BLK_STS_OK; default: req->rq_flags |= RQF_QUIET; return BLK_STS_IOERR; From 26f9ce53817a8fd84b69a73473a7de852a24c897 Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Fri, 15 Apr 2022 12:42:29 +0000 Subject: [PATCH 495/803] scsi: qla2xxx: Fix missed DMA unmap for aborted commands Aborting commands that have already been sent to the firmware can cause BUG in qlt_free_cmd(): BUG_ON(cmd->sg_mapped) For instance: - Command passes rdx_to_xfer state, maps sgl, sends to the firmware - Reset occurs, qla2xxx performs ISP error recovery, aborts the command - Target stack calls qlt_abort_cmd() and then qlt_free_cmd() - BUG_ON(cmd->sg_mapped) in qlt_free_cmd() occurs because sgl was not unmapped Thus, unmap sgl in qlt_abort_cmd() for commands with the aborted flag set. Link: https://lore.kernel.org/r/AS8PR10MB4952D545F84B6B1DFD39EC1E9DEE9@AS8PR10MB4952.EURPRD10.PROD.OUTLOOK.COM Reviewed-by: Himanshu Madhani Signed-off-by: Gleb Chesnokov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 85dbf81f3204..6dfcfd8e7337 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3826,6 +3826,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: From b800528b97d0adc3a5ba42d78a8b0d3f07a31f44 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Mon, 2 May 2022 12:57:49 +0530 Subject: [PATCH 496/803] net: emaclite: Don't advertise 1000BASE-T and do auto negotiation In xemaclite_open() function we are setting the max speed of emaclite to 100Mb using phy_set_max_speed() function so, there is no need to write the advertising registers to stop giga-bit speed and the phy_start() function starts the auto-negotiation so, there is no need to handle it separately using advertising registers. Remove the phy_read and phy_write of advertising registers in xemaclite_open() function. Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 57a24f62e353..f7394a5160cf 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -926,8 +926,6 @@ static int xemaclite_open(struct net_device *dev) xemaclite_disable_interrupts(lp); if (lp->phy_node) { - u32 bmcr; - lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, xemaclite_adjust_link, 0, PHY_INTERFACE_MODE_MII); @@ -938,19 +936,6 @@ static int xemaclite_open(struct net_device *dev) /* EmacLite doesn't support giga-bit speeds */ phy_set_max_speed(lp->phy_dev, SPEED_100); - - /* Don't advertise 1000BASE-T Full/Half duplex speeds */ - phy_write(lp->phy_dev, MII_CTRL1000, 0); - - /* Advertise only 10 and 100mbps full/half duplex speeds */ - phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL | - ADVERTISE_CSMA); - - /* Restart auto negotiation */ - bmcr = phy_read(lp->phy_dev, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - phy_write(lp->phy_dev, MII_BMCR, bmcr); - phy_start(lp->phy_dev); } From 7a6bc33ab54923d325d9a1747ec9652c4361ebd1 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Mon, 2 May 2022 12:57:50 +0530 Subject: [PATCH 497/803] net: emaclite: Add error handling for of_address_to_resource() check the return value of of_address_to_resource() and also add missing of_node_put() for np and npp nodes. Fixes: e0a3bc65448c ("net: emaclite: Support multiple phys connected to one MDIO bus") Addresses-Coverity: Event check_return value. Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index f7394a5160cf..d770b3ac3f74 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -823,10 +823,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) { struct mii_bus *bus; - int rc; struct resource res; struct device_node *np = of_get_parent(lp->phy_node); struct device_node *npp; + int rc, ret; /* Don't register the MDIO bus if the phy_node or its parent node * can't be found. @@ -836,8 +836,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) return -ENODEV; } npp = of_get_parent(np); - - of_address_to_resource(npp, 0, &res); + ret = of_address_to_resource(npp, 0, &res); + of_node_put(npp); + if (ret) { + dev_err(dev, "%s resource error!\n", + dev->of_node->full_name); + of_node_put(np); + return ret; + } if (lp->ndev->mem_start != res.start) { struct phy_device *phydev; phydev = of_phy_find_device(lp->phy_node); @@ -846,6 +852,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) "MDIO of the phy is not registered yet\n"); else put_device(&phydev->mdio.dev); + of_node_put(np); return 0; } @@ -858,6 +865,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus = mdiobus_alloc(); if (!bus) { dev_err(dev, "Failed to allocate mdiobus\n"); + of_node_put(np); return -ENOMEM; } @@ -870,6 +878,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) bus->parent = dev; rc = of_mdiobus_register(bus, np); + of_node_put(np); if (rc) { dev_err(dev, "Failed to register mdio bus.\n"); goto err_register; From 3122257c02afd9f199a8fc84ae981e1fc4958532 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 2 May 2022 11:45:07 +0300 Subject: [PATCH 498/803] selftests: mirror_gre_bridge_1q: Avoid changing PVID while interface is operational In emulated environments, the bridge ports enslaved to br1 get a carrier before changing br1's PVID. This means that by the time the PVID is changed, br1 is already operational and configured with an IPv6 link-local address. When the test is run with netdevs registered by mlxsw, changing the PVID is vetoed, as changing the VID associated with an existing L3 interface is forbidden. This restriction is similar to the 8021q driver's restriction of changing the VID of an existing interface. Fix this by taking br1 down and bringing it back up when it is fully configured. With this fix, the test reliably passes on top of both the SW and HW data paths (emulated or not). Fixes: 239e754af854 ("selftests: forwarding: Test mirror-to-gretap w/ UL 802.1q") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20220502084507.364774-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index a3402cd8d5b6..9ff22f28032d 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -61,9 +61,12 @@ setup_prepare() vrf_prepare mirror_gre_topo_create + # Avoid changing br1's PVID while it is operational as a L3 interface. + ip link set dev br1 down ip link set dev $swp3 master br1 bridge vlan add dev br1 vid 555 pvid untagged self + ip link set dev br1 up ip address add dev br1 192.0.2.129/28 ip address add dev br1 2001:db8:2::1/64 From 97926d5a847ca1758ad8702ce591e3b05a701e0d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 2 May 2022 11:46:37 +0200 Subject: [PATCH 499/803] selftests/net: so_txtime: fix parsing of start time stamp on 32 bit systems This patch fixes the parsing of the cmd line supplied start time on 32 bit systems. A "long" on 32 bit systems is only 32 bit wide and cannot hold a timestamp in nano second resolution. Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support") Cc: Carlos Llamas Cc: Willem de Bruijn Signed-off-by: Marc Kleine-Budde Acked-by: Willem de Bruijn Reviewed-by: Carlos Llamas Link: https://lore.kernel.org/r/20220502094638.1921702-2-mkl@pengutronix.de Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_txtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 59067f64b775..103f6bf28e35 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv) cfg_rx = true; break; case 't': - cfg_start_time_ns = strtol(optarg, NULL, 0); + cfg_start_time_ns = strtoll(optarg, NULL, 0); break; case 'm': cfg_mark = strtol(optarg, NULL, 0); From f5c2174a3775491e890ce285df52f5715fbef875 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 2 May 2022 11:46:38 +0200 Subject: [PATCH 500/803] selftests/net: so_txtime: usage(): fix documentation of default clock The program uses CLOCK_TAI as default clock since it was added to the Linux repo. In commit: | 040806343bb4 ("selftests/net: so_txtime multi-host support") a help text stating the wrong default clock was added. This patch fixes the help text. Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support") Cc: Carlos Llamas Cc: Willem de Bruijn Signed-off-by: Marc Kleine-Budde Acked-by: Willem de Bruijn Reviewed-by: Carlos Llamas Link: https://lore.kernel.org/r/20220502094638.1921702-3-mkl@pengutronix.de Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_txtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 103f6bf28e35..2672ac0b6d1f 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -421,7 +421,7 @@ static void usage(const char *progname) "Options:\n" " -4 only IPv4\n" " -6 only IPv6\n" - " -c monotonic (default) or tai\n" + " -c monotonic or tai (default)\n" " -D destination IP address (server)\n" " -S source IP address (client)\n" " -r run rx mode\n" From 706c9c55e5a32800605eb6a864ef6e1ca0c6c179 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:41 +0000 Subject: [PATCH 501/803] KVM: x86/mmu: Don't treat fully writable SPTEs as volatile (modulo A/D) Don't treat SPTEs that are truly writable, i.e. writable in hardware, as being volatile (unless they're volatile for other reasons, e.g. A/D bits). KVM _sets_ the WRITABLE bit out of mmu_lock, but never _clears_ the bit out of mmu_lock, so if the WRITABLE bit is set, it cannot magically get cleared just because the SPTE is MMU-writable. Rename the wrapper of MMU-writable to be more literal, the previous name of spte_can_locklessly_be_made_writable() is wrong and misleading. Fixes: c7ba5b48cc8d ("KVM: MMU: fast path of handling guest page fault") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 17 +++++++++-------- arch/x86/kvm/mmu/spte.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 64a2a7e2be90..48dcb6a782f4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -484,13 +484,15 @@ static bool spte_has_volatile_bits(u64 spte) * also, it can help us to get a stable is_writable_pte() * to ensure tlb flush is not missed. */ - if (spte_can_locklessly_be_made_writable(spte) || - is_access_track_spte(spte)) + if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) + return true; + + if (is_access_track_spte(spte)) return true; if (spte_ad_enabled(spte)) { - if ((spte & shadow_accessed_mask) == 0 || - (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) + if (!(spte & shadow_accessed_mask) || + (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) return true; } @@ -557,7 +559,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ - if (spte_can_locklessly_be_made_writable(old_spte) && + if (is_mmu_writable_spte(old_spte) && !is_writable_pte(new_spte)) flush = true; @@ -1187,7 +1189,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) u64 spte = *sptep; if (!is_writable_pte(spte) && - !(pt_protect && spte_can_locklessly_be_made_writable(spte))) + !(pt_protect && is_mmu_writable_spte(spte))) return false; rmap_printk("spte %p %llx\n", sptep, *sptep); @@ -3196,8 +3198,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * be removed in the fast path only if the SPTE was * write-protected for dirty-logging or access tracking. */ - if (fault->write && - spte_can_locklessly_be_made_writable(spte)) { + if (fault->write && is_mmu_writable_spte(spte)) { new_spte |= PT_WRITABLE_MASK; /* diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index e4abeb5df1b1..c571784cb567 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -390,7 +390,7 @@ static inline void check_spte_writable_invariants(u64 spte) "kvm: Writable SPTE is not MMU-writable: %llx", spte); } -static inline bool spte_can_locklessly_be_made_writable(u64 spte) +static inline bool is_mmu_writable_spte(u64 spte) { return spte & shadow_mmu_writable_mask; } From 54eb3ef56f36827aad90915df33387d4c2b5df5a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:42 +0000 Subject: [PATCH 502/803] KVM: x86/mmu: Move shadow-present check out of spte_has_volatile_bits() Move the is_shadow_present_pte() check out of spte_has_volatile_bits() and into its callers. Well, caller, since only one of its two callers doesn't already do the shadow-present check. Opportunistically move the helper to spte.c/h so that it can be used by the TDP MMU, which is also the primary motivation for the shadow-present change. Unlike the legacy MMU, the TDP MMU uses a single path for clear leaf and non-leaf SPTEs, and to avoid unnecessary atomic updates, the TDP MMU will need to check is_last_spte() prior to calling spte_has_volatile_bits(), and calling is_last_spte() without first calling is_shadow_present_spte() is at best odd, and at worst a violation of KVM's loosely defines SPTE rules. Note, mmu_spte_clear_track_bits() could likely skip the write entirely for SPTEs that are not shadow-present. Leave that cleanup for a future patch to avoid introducing a functional change, and because the shadow-present check can likely be moved further up the stack, e.g. drop_large_spte() appears to be the only path that doesn't already explicitly check for a shadow-present SPTE. No functional change intended. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 29 ++--------------------------- arch/x86/kvm/mmu/spte.c | 28 ++++++++++++++++++++++++++++ arch/x86/kvm/mmu/spte.h | 2 ++ 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 48dcb6a782f4..311e4e1d7870 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -473,32 +473,6 @@ retry: } #endif -static bool spte_has_volatile_bits(u64 spte) -{ - if (!is_shadow_present_pte(spte)) - return false; - - /* - * Always atomically update spte if it can be updated - * out of mmu-lock, it can ensure dirty bit is not lost, - * also, it can help us to get a stable is_writable_pte() - * to ensure tlb flush is not missed. - */ - if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) - return true; - - if (is_access_track_spte(spte)) - return true; - - if (spte_ad_enabled(spte)) { - if (!(spte & shadow_accessed_mask) || - (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) - return true; - } - - return false; -} - /* Rules for using mmu_spte_set: * Set the sptep from nonpresent to present. * Note: the sptep being assigned *must* be either not present @@ -593,7 +567,8 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) u64 old_spte = *sptep; int level = sptep_to_sp(sptep)->role.level; - if (!spte_has_volatile_bits(old_spte)) + if (!is_shadow_present_pte(old_spte) || + !spte_has_volatile_bits(old_spte)) __update_clear_spte_fast(sptep, 0ull); else old_spte = __update_clear_spte_slow(sptep, 0ull); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 4739b53c9734..e5c0b6db6f2c 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -90,6 +90,34 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) E820_TYPE_RAM); } +/* + * Returns true if the SPTE has bits that may be set without holding mmu_lock. + * The caller is responsible for checking if the SPTE is shadow-present, and + * for determining whether or not the caller cares about non-leaf SPTEs. + */ +bool spte_has_volatile_bits(u64 spte) +{ + /* + * Always atomically update spte if it can be updated + * out of mmu-lock, it can ensure dirty bit is not lost, + * also, it can help us to get a stable is_writable_pte() + * to ensure tlb flush is not missed. + */ + if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) + return true; + + if (is_access_track_spte(spte)) + return true; + + if (spte_ad_enabled(spte)) { + if (!(spte & shadow_accessed_mask) || + (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) + return true; + } + + return false; +} + bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index c571784cb567..80ab0f5cff01 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -404,6 +404,8 @@ static inline u64 get_mmio_spte_generation(u64 spte) return gen; } +bool spte_has_volatile_bits(u64 spte); + bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn, From ba3a6120a4e7efc13d19fe43eb6c5caf1da05b72 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 23 Apr 2022 03:47:43 +0000 Subject: [PATCH 503/803] KVM: x86/mmu: Use atomic XCHG to write TDP MMU SPTEs with volatile bits Use an atomic XCHG to write TDP MMU SPTEs that have volatile bits, even if mmu_lock is held for write, as volatile SPTEs can be written by other tasks/vCPUs outside of mmu_lock. If a vCPU uses the to-be-modified SPTE to write a page, the CPU can cache the translation as WRITABLE in the TLB despite it being seen by KVM as !WRITABLE, and/or KVM can clobber the Accessed/Dirty bits and not properly tag the backing page. Exempt non-leaf SPTEs from atomic updates as KVM itself doesn't modify non-leaf SPTEs without holding mmu_lock, they do not have Dirty bits, and KVM doesn't consume the Accessed bit of non-leaf SPTEs. Dropping the Dirty and/or Writable bits is most problematic for dirty logging, as doing so can result in a missed TLB flush and eventually a missed dirty page. In the unlikely event that the only dirty page(s) is a clobbered SPTE, clear_dirty_gfn_range() will see the SPTE as not dirty (based on the Dirty or Writable bit depending on the method) and so not update the SPTE and ultimately not flush. If the SPTE is cached in the TLB as writable before it is clobbered, the guest can continue writing the associated page without ever taking a write-protect fault. For most (all?) file back memory, dropping the Dirty bit is a non-issue. The primary MMU write-protects its PTEs on writeback, i.e. KVM's dirty bit is effectively ignored because the primary MMU will mark that page dirty when the write-protection is lifted, e.g. when KVM faults the page back in for write. The Accessed bit is a complete non-issue. Aside from being unused for non-leaf SPTEs, KVM doesn't do a TLB flush when aging SPTEs, i.e. the Accessed bit may be dropped anyways. Lastly, the Writable bit is also problematic as an extension of the Dirty bit, as KVM (correctly) treats the Dirty bit as volatile iff the SPTE is !DIRTY && WRITABLE. If KVM fixes an MMU-writable, but !WRITABLE, SPTE out of mmu_lock, then it can allow the CPU to set the Dirty bit despite the SPTE being !WRITABLE when it is checked by KVM. But that all depends on the Dirty bit being problematic in the first place. Fixes: 2f2fad0897cb ("kvm: x86/mmu: Add functions to handle changed TDP SPTEs") Cc: stable@vger.kernel.org Cc: Ben Gardon Cc: David Matlack Cc: Venkatesh Srinivas Signed-off-by: Sean Christopherson Message-Id: <20220423034752.1161007-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_iter.h | 34 ++++++++++++++- arch/x86/kvm/mmu/tdp_mmu.c | 82 ++++++++++++++++++++++++------------- 2 files changed, 85 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index b1eaf6ec0e0b..f0af385c56e0 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -6,6 +6,7 @@ #include #include "mmu.h" +#include "spte.h" /* * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs) @@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep) { return READ_ONCE(*rcu_dereference(sptep)); } -static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val) + +static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte) { - WRITE_ONCE(*rcu_dereference(sptep), val); + return xchg(rcu_dereference(sptep), new_spte); +} + +static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) +{ + WRITE_ONCE(*rcu_dereference(sptep), new_spte); +} + +static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, + u64 new_spte, int level) +{ + /* + * Atomically write the SPTE if it is a shadow-present, leaf SPTE with + * volatile bits, i.e. has bits that can be set outside of mmu_lock. + * The Writable bit can be set by KVM's fast page fault handler, and + * Accessed and Dirty bits can be set by the CPU. + * + * Note, non-leaf SPTEs do have Accessed bits and those bits are + * technically volatile, but KVM doesn't consume the Accessed bit of + * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This + * logic needs to be reassessed if KVM were to use non-leaf Accessed + * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. + */ + if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) && + spte_has_volatile_bits(old_spte)) + return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); + + __kvm_tdp_mmu_write_spte(sptep, new_spte); + return old_spte; } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index edc68538819b..922b06bf4b94 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -426,9 +426,9 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) tdp_mmu_unlink_sp(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - u64 *sptep = rcu_dereference(pt) + i; + tdp_ptep_t sptep = pt + i; gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); - u64 old_child_spte; + u64 old_spte; if (shared) { /* @@ -440,8 +440,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) * value to the removed SPTE value. */ for (;;) { - old_child_spte = xchg(sptep, REMOVED_SPTE); - if (!is_removed_spte(old_child_spte)) + old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, REMOVED_SPTE); + if (!is_removed_spte(old_spte)) break; cpu_relax(); } @@ -455,23 +455,43 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) * are guarded by the memslots generation, not by being * unreachable. */ - old_child_spte = READ_ONCE(*sptep); - if (!is_shadow_present_pte(old_child_spte)) + old_spte = kvm_tdp_mmu_read_spte(sptep); + if (!is_shadow_present_pte(old_spte)) continue; /* - * Marking the SPTE as a removed SPTE is not - * strictly necessary here as the MMU lock will - * stop other threads from concurrently modifying - * this SPTE. Using the removed SPTE value keeps - * the two branches consistent and simplifies - * the function. + * Use the common helper instead of a raw WRITE_ONCE as + * the SPTE needs to be updated atomically if it can be + * modified by a different vCPU outside of mmu_lock. + * Even though the parent SPTE is !PRESENT, the TLB + * hasn't yet been flushed, and both Intel and AMD + * document that A/D assists can use upper-level PxE + * entries that are cached in the TLB, i.e. the CPU can + * still access the page and mark it dirty. + * + * No retry is needed in the atomic update path as the + * sole concern is dropping a Dirty bit, i.e. no other + * task can zap/remove the SPTE as mmu_lock is held for + * write. Marking the SPTE as a removed SPTE is not + * strictly necessary for the same reason, but using + * the remove SPTE value keeps the shared/exclusive + * paths consistent and allows the handle_changed_spte() + * call below to hardcode the new value to REMOVED_SPTE. + * + * Note, even though dropping a Dirty bit is the only + * scenario where a non-atomic update could result in a + * functional bug, simply checking the Dirty bit isn't + * sufficient as a fast page fault could read the upper + * level SPTE before it is zapped, and then make this + * target SPTE writable, resume the guest, and set the + * Dirty bit between reading the SPTE above and writing + * it here. */ - WRITE_ONCE(*sptep, REMOVED_SPTE); + old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, + REMOVED_SPTE, level); } handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn, - old_child_spte, REMOVED_SPTE, level, - shared); + old_spte, REMOVED_SPTE, level, shared); } call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); @@ -667,14 +687,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, KVM_PAGES_PER_HPAGE(iter->level)); /* - * No other thread can overwrite the removed SPTE as they - * must either wait on the MMU lock or use - * tdp_mmu_set_spte_atomic which will not overwrite the - * special removed SPTE value. No bookkeeping is needed - * here since the SPTE is going from non-present - * to non-present. + * No other thread can overwrite the removed SPTE as they must either + * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not + * overwrite the special removed SPTE value. No bookkeeping is needed + * here since the SPTE is going from non-present to non-present. Use + * the raw write helper to avoid an unnecessary check on volatile bits. */ - kvm_tdp_mmu_write_spte(iter->sptep, 0); + __kvm_tdp_mmu_write_spte(iter->sptep, 0); return 0; } @@ -699,10 +718,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, * unless performing certain dirty logging operations. * Leaving record_dirty_log unset in that case prevents page * writes from being double counted. + * + * Returns the old SPTE value, which _may_ be different than @old_spte if the + * SPTE had voldatile bits. */ -static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, - u64 old_spte, u64 new_spte, gfn_t gfn, int level, - bool record_acc_track, bool record_dirty_log) +static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, + u64 old_spte, u64 new_spte, gfn_t gfn, int level, + bool record_acc_track, bool record_dirty_log) { lockdep_assert_held_write(&kvm->mmu_lock); @@ -715,7 +737,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, */ WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte)); - kvm_tdp_mmu_write_spte(sptep, new_spte); + old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false); @@ -724,6 +746,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, if (record_dirty_log) handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, new_spte, level); + return old_spte; } static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, @@ -732,9 +755,10 @@ static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, { WARN_ON_ONCE(iter->yielded); - __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, iter->old_spte, - new_spte, iter->gfn, iter->level, - record_acc_track, record_dirty_log); + iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, + iter->old_spte, new_spte, + iter->gfn, iter->level, + record_acc_track, record_dirty_log); } static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, From 3a58f13a881ed351198ffab4cf9953cf19d2ab3a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 2 May 2022 10:40:18 +0900 Subject: [PATCH 504/803] net: rds: acquire refcount on TCP sockets syzbot is reporting use-after-free read in tcp_retransmit_timer() [1], for TCP socket used by RDS is accessing sock_net() without acquiring a refcount on net namespace. Since TCP's retransmission can happen after a process which created net namespace terminated, we need to explicitly acquire a refcount. Link: https://syzkaller.appspot.com/bug?extid=694120e1002c117747ed [1] Reported-by: syzbot Fixes: 26abe14379f8e2fa ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f03661 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Signed-off-by: Tetsuo Handa Tested-by: syzbot Link: https://lore.kernel.org/r/a5fb1fc4-2284-3359-f6a0-e4e390239d7b@I-love.SAKURA.ne.jp Signed-off-by: Paolo Abeni --- net/rds/tcp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5327d130c4b5..2f638f8b7b1e 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -495,6 +495,14 @@ void rds_tcp_tune(struct socket *sock) tcp_sock_set_nodelay(sock->sk); lock_sock(sk); + /* TCP timer functions might access net namespace even after + * a process which created this net namespace terminated. + */ + if (!sk->sk_net_refcnt) { + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + } if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; From 5eb849322d7f7ae9d5c587c7bc3b4f7c6872cd2f Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 2 May 2022 22:01:36 -0700 Subject: [PATCH 505/803] KVM: x86/svm: Account for family 17h event renumberings in amd_pmc_perf_hw_id Zen renumbered some of the performance counters that correspond to the well known events in perf_hw_id. This code in KVM was never updated for that, so guest that attempt to use counters on Zen that correspond to the pre-Zen perf_hw_id values will silently receive the wrong values. This has been observed in the wild with rr[0] when running in Zen 3 guests. rr uses the retired conditional branch counter 00d1 which is incorrectly recognized by KVM as PERF_COUNT_HW_STALLED_CYCLES_BACKEND. [0] https://rr-project.org/ Signed-off-by: Kyle Huey Message-Id: <20220503050136.86298-1-khuey@kylehuey.com> Cc: stable@vger.kernel.org [Check guest family, not host. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/pmu.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 24eb935b6f85..311cbaa0c3dd 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -45,6 +45,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, }; +/* duplicated from amd_f17h_perfmon_event_map. */ +static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = { + [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, + [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, + [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES }, + [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, + [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + +/* amd_pmc_perf_hw_id depends on these being the same size */ +static_assert(ARRAY_SIZE(amd_event_mapping) == + ARRAY_SIZE(amd_f17h_event_mapping)); + static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); @@ -140,6 +156,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) { + struct kvm_event_hw_type_mapping *event_mapping; u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; @@ -148,15 +165,20 @@ static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) if (WARN_ON(pmc_is_fixed(pmc))) return PERF_COUNT_HW_MAX; + if (guest_cpuid_family(pmc->vcpu) >= 0x17) + event_mapping = amd_f17h_event_mapping; + else + event_mapping = amd_event_mapping; + for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) - if (amd_event_mapping[i].eventsel == event_select - && amd_event_mapping[i].unit_mask == unit_mask) + if (event_mapping[i].eventsel == event_select + && event_mapping[i].unit_mask == unit_mask) break; if (i == ARRAY_SIZE(amd_event_mapping)) return PERF_COUNT_HW_MAX; - return amd_event_mapping[i].event_type; + return event_mapping[i].event_type; } /* check if a PMC is enabled by comparing it against global_ctrl bits. Because From 5a1bde46f98b893cda6122b00e94c0c40a6ead3c Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 27 Apr 2022 17:01:49 +0530 Subject: [PATCH 506/803] kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural PMU On some x86 processors, CPUID leaf 0xA provides information on Architectural Performance Monitoring features. It advertises a PMU version which Qemu uses to determine the availability of additional MSRs to manage the PMCs. Upon receiving a KVM_GET_SUPPORTED_CPUID ioctl request for the same, the kernel constructs return values based on the x86_pmu_capability irrespective of the vendor. This leaf and the additional MSRs are not supported on AMD and Hygon processors. If AMD PerfMonV2 is detected, the PMU version is set to 2 and guest startup breaks because of an attempt to access a non-existent MSR. Return zeros to avoid this. Fixes: a6c06ed1a60a ("KVM: Expose the architectural performance monitoring CPUID leaf") Reported-by: Vasant Hegde Signed-off-by: Sandipan Das Message-Id: <3fef83d9c2b2f7516e8ff50d60851f29a4bcb716.1651058600.git.sandipan.das@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b24ca7f4ed7c..732724ea5b10 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -887,6 +887,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) union cpuid10_eax eax; union cpuid10_edx edx; + if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + perf_get_x86_pmu_capability(&cap); /* From a06afe8383080c630a7a528b8382fc6bb4925b61 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 29 Apr 2022 17:15:26 +0200 Subject: [PATCH 507/803] KVM: s390: vsie/gmap: reduce gmap_rmap overhead there are cases that trigger a 2nd shadow event for the same vmaddr/raddr combination. (prefix changes, reboots, some known races) This will increase memory usages and it will result in long latencies when cleaning up, e.g. on shutdown. To avoid cases with a list that has hundreds of identical raddrs we check existing entries at insert time. As this measurably reduces the list length this will be faster than traversing the list at shutdown time. In the long run several places will be optimized to create less entries and a shrinker might be necessary. Fixes: 4be130a08420 ("s390/mm: add shadow gmap support") Signed-off-by: Christian Borntraeger Acked-by: David Hildenbrand Link: https://lore.kernel.org/r/20220429151526.1560-1-borntraeger@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index af03cacf34ec..1ac73917a8d3 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1183,6 +1183,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { + struct gmap_rmap *temp; void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); @@ -1190,6 +1191,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, if (slot) { rmap->next = radix_tree_deref_slot_protected(slot, &sg->guest_table_lock); + for (temp = rmap->next; temp; temp = temp->next) { + if (temp->raddr == rmap->raddr) { + kfree(rmap); + return; + } + } radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); } else { rmap->next = NULL; From 3481551f035725fdc46885425eac3ef9b58ae7b7 Mon Sep 17 00:00:00 2001 From: Camel Guo Date: Tue, 3 May 2022 13:43:33 +0200 Subject: [PATCH 508/803] hwmon: (tmp401) Add OF device ID table This driver doesn't have of_match_table. This makes the kernel module tmp401.ko lack alias patterns (e.g: of:N*T*Cti,tmp411) to match DT node of the supported devices hence this kernel module will not be automatically loaded. After adding of_match_table to this driver, the folllowing alias will be added into tmp401.ko. $ modinfo drivers/hwmon/tmp401.ko filename: drivers/hwmon/tmp401.ko ...... author: Hans de Goede alias: of:N*T*Cti,tmp435C* alias: of:N*T*Cti,tmp435 alias: of:N*T*Cti,tmp432C* alias: of:N*T*Cti,tmp432 alias: of:N*T*Cti,tmp431C* alias: of:N*T*Cti,tmp431 alias: of:N*T*Cti,tmp411C* alias: of:N*T*Cti,tmp411 alias: of:N*T*Cti,tmp401C* alias: of:N*T*Cti,tmp401 ...... Fixes: af503716ac14 ("i2c: core: report OF style module alias for devices registered via OF") Signed-off-by: Camel Guo Link: https://lore.kernel.org/r/20220503114333.456476-1-camel.guo@axis.com Signed-off-by: Guenter Roeck --- drivers/hwmon/tmp401.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c index b86d9df7105d..52c9e7d3f2ae 100644 --- a/drivers/hwmon/tmp401.c +++ b/drivers/hwmon/tmp401.c @@ -708,10 +708,21 @@ static int tmp401_probe(struct i2c_client *client) return 0; } +static const struct of_device_id __maybe_unused tmp4xx_of_match[] = { + { .compatible = "ti,tmp401", }, + { .compatible = "ti,tmp411", }, + { .compatible = "ti,tmp431", }, + { .compatible = "ti,tmp432", }, + { .compatible = "ti,tmp435", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp4xx_of_match); + static struct i2c_driver tmp401_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "tmp401", + .of_match_table = of_match_ptr(tmp4xx_of_match), }, .probe_new = tmp401_probe, .id_table = tmp401_id, From aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 May 2022 15:50:14 +0200 Subject: [PATCH 509/803] fbdev: Make fb_release() return -ENODEV if fbdev was unregistered A reference to the framebuffer device struct fb_info is stored in the file private data, but this reference could no longer be valid and must not be accessed directly. Instead, the file_fb_info() accessor function must be used since it does sanity checking to make sure that the fb_info is valid. This can happen for example if the registered framebuffer device is for a driver that just uses a framebuffer provided by the system firmware. In that case, the fbdev core would unregister the framebuffer device when a real video driver is probed and ask to remove conflicting framebuffers. The bug has been present for a long time but commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") unmasked it since the fbdev core started unregistering the framebuffers' devices associated. Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Reported-by: Maxime Ripard Reported-by: Junxiao Chang Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220502135014.377945-1-javierm@redhat.com --- drivers/video/fbdev/core/fbmem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index a6bb0e438216..97eb0dee411c 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1434,7 +1434,10 @@ fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { - struct fb_info * const info = file->private_data; + struct fb_info * const info = file_fb_info(file); + + if (!info) + return -ENODEV; lock_fb_info(info); if (info->fbops->fb_release) From 20ce30fb4750f2ffc130cdcb26232b1dd87cd0a5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 26 Apr 2022 18:32:26 -0700 Subject: [PATCH 510/803] interconnect: Restore sync state by ignoring ipa-virt in provider count Ignore compatible strings for the IPA virt drivers that were removed in commits 2fb251c26560 ("interconnect: qcom: sdx55: Drop IP0 interconnects") and 2f3724930eb4 ("interconnect: qcom: sc7180: Drop IP0 interconnects") so that the sync state logic can kick in again. Otherwise all the interconnects in the system will stay pegged at max speeds because 'providers_count' is always going to be one larger than the number of drivers that will ever probe on sc7180 or sdx55. This fixes suspend on sc7180 and sdx55 devices when you don't have a devicetree patch to remove the ipa-virt compatible node. Cc: Bjorn Andersson Cc: Doug Anderson Cc: Alex Elder Cc: Taniya Das Cc: Mike Tipton Fixes: 2fb251c26560 ("interconnect: qcom: sdx55: Drop IP0 interconnects") Fixes: 2f3724930eb4 ("interconnect: qcom: sc7180: Drop IP0 interconnects") Signed-off-by: Stephen Boyd Reviewed-by: Alex Elder Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20220427013226.341209-1-swboyd@chromium.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 9050ca1f4285..808f6e7a8048 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1087,9 +1087,15 @@ static int of_count_icc_providers(struct device_node *np) { struct device_node *child; int count = 0; + const struct of_device_id __maybe_unused ignore_list[] = { + { .compatible = "qcom,sc7180-ipa-virt" }, + { .compatible = "qcom,sdx55-ipa-virt" }, + {} + }; for_each_available_child_of_node(np, child) { - if (of_property_read_bool(child, "#interconnect-cells")) + if (of_property_read_bool(child, "#interconnect-cells") && + likely(!of_match_node(ignore_list, child))) count++; count += of_count_icc_providers(child); } From 2069624dac19d62c558bb6468fe03678553ab01d Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Mon, 2 May 2022 23:33:15 +0100 Subject: [PATCH 511/803] net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT As noted elsewhere, various GPON SFP modules exhibit non-standard TX-fault behaviour. In the tested case, the Huawei MA5671A, when used in combination with a Marvell mv88e6085 switch, was found to persistently assert TX-fault, resulting in the module being disabled. This patch adds a quirk to ignore the SFP_F_TX_FAULT state, allowing the module to function. Change from v1: removal of erroneous return statment (Andrew Lunn) Signed-off-by: Matthew Hagan Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220502223315.1973376-1-mnhagan88@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4dfb79807823..9a5d5a10560f 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -250,6 +250,7 @@ struct sfp { struct sfp_eeprom_id id; unsigned int module_power_mW; unsigned int module_t_start_up; + bool tx_fault_ignore; #if IS_ENABLED(CONFIG_HWMON) struct sfp_diag diag; @@ -1956,6 +1957,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) else sfp->module_t_start_up = T_START_UP; + if (!memcmp(id.base.vendor_name, "HUAWEI ", 16) && + !memcmp(id.base.vendor_pn, "MA5671A ", 16)) + sfp->tx_fault_ignore = true; + else + sfp->tx_fault_ignore = false; + return 0; } @@ -2409,7 +2416,10 @@ static void sfp_check_state(struct sfp *sfp) mutex_lock(&sfp->st_mutex); state = sfp_get_state(sfp); changed = state ^ sfp->state; - changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; + if (sfp->tx_fault_ignore) + changed &= SFP_F_PRESENT | SFP_F_LOS; + else + changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; for (i = 0; i < GPIO_MAX; i++) if (changed & BIT(i)) From 5ef9b803a4af0f5e42012176889b40bb2a978b18 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Mon, 2 May 2022 23:14:09 +0300 Subject: [PATCH 512/803] smsc911x: allow using IRQ0 The AlphaProject AP-SH4A-3A/AP-SH4AD-0A SH boards use IRQ0 for their SMSC LAN911x Ethernet chip, so the networking on them must have been broken by commit 965b2aa78fbc ("net/smsc911x: fix irq resource allocation failure") which filtered out 0 as well as the negative error codes -- it was kinda correct at the time, as platform_get_irq() could return 0 on of_irq_get() failure and on the actual 0 in an IRQ resource. This issue was fixed by me (back in 2016!), so we should be able to fix this driver to allow IRQ0 usage again... When merging this to the stable kernels, make sure you also merge commit e330b9a6bb35 ("platform: don't return 0 from platform_get_irq[_byname]() on error") -- that's my fix to platform_get_irq() for the DT platforms... Fixes: 965b2aa78fbc ("net/smsc911x: fix irq resource allocation failure") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/656036e4-6387-38df-b8a7-6ba683b16e63@omp.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smsc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 7a50ba00f8ae..c854efdf1f25 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2431,7 +2431,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) if (irq == -EPROBE_DEFER) { retval = -EPROBE_DEFER; goto out_0; - } else if (irq <= 0) { + } else if (irq < 0) { pr_warn("Could not allocate irq resource\n"); retval = -ENODEV; goto out_0; From 13ba794397e45e52893cfc21d7a69cb5f341b407 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 2 May 2022 21:13:10 -0400 Subject: [PATCH 513/803] bnxt_en: Fix possible bnxt_open() failure caused by wrong RFS flag bnxt_open() can fail in this code path, especially on a VF when it fails to reserve default rings: bnxt_open() __bnxt_open_nic() bnxt_clear_int_mode() bnxt_init_dflt_ring_mode() RX rings would be set to 0 when we hit this error path. It is possible for a subsequent bnxt_open() call to potentially succeed with a code path like this: bnxt_open() bnxt_hwrm_if_change() bnxt_fw_init_one() bnxt_fw_init_one_p3() bnxt_set_dflt_rfs() bnxt_rfs_capable() bnxt_hwrm_reserve_rings() On older chips, RFS is capable if we can reserve the number of vnics that is equal to RX rings + 1. But since RX rings is still set to 0 in this code path, we may mistakenly think that RFS is supported for 0 RX rings. Later, when the default RX rings are reserved and we try to enable RFS, it would fail and cause bnxt_open() to fail unnecessarily. We fix this in 2 places. bnxt_rfs_capable() will always return false if RX rings is not yet set. bnxt_init_dflt_ring_mode() will call bnxt_set_dflt_rfs() which will always clear the RFS flags if RFS is not supported. Fixes: 20d7d1c5c9b1 ("bnxt_en: reliably allocate IRQ table on reset to avoid crash") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 874fad0a5cf8..2818cfef42f8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10983,7 +10983,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5) return bnxt_rfs_supported(bp); - if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp)) + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; vnics = 1 + bp->rx_nr_rings; @@ -13234,10 +13234,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) goto init_dflt_ring_err; bp->tx_nr_rings_per_tc = bp->tx_nr_rings; - if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) { - bp->flags |= BNXT_FLAG_RFS; - bp->dev->features |= NETIF_F_NTUPLE; - } + + bnxt_set_dflt_rfs(bp); + init_dflt_ring_err: bnxt_ulp_irq_restart(bp, rc); return rc; From 2b156fb57d8f0d28f2207edc646751f4717cf20d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 2 May 2022 21:13:11 -0400 Subject: [PATCH 514/803] bnxt_en: Initiallize bp->ptp_lock first before using it bnxt_ptp_init() calls bnxt_ptp_init_rtc() which will acquire the ptp_lock spinlock. The spinlock is not initialized until later. Move the bnxt_ptp_init_rtc() call after the spinlock is initialized. Fixes: 24ac1ecd5240 ("bnxt_en: Add driver support to use Real Time Counter for PTP") Reviewed-by: Pavan Chebbi Reviewed-by: Saravanan Vajravel Reviewed-by: Andy Gospodarek Reviewed-by: Somnath Kotur Reviewed-by: Damodharam Ammepalli Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 9c2ad5e67a5d..00f2f80c0073 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -846,13 +846,6 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) if (rc) return rc; - if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { - bnxt_ptp_timecounter_init(bp, false); - rc = bnxt_ptp_init_rtc(bp, phc_cfg); - if (rc) - goto out; - } - if (ptp->ptp_clock && bnxt_pps_config_ok(bp)) return 0; @@ -861,8 +854,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); - if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + bnxt_ptp_timecounter_init(bp, false); + rc = bnxt_ptp_init_rtc(bp, phc_cfg); + if (rc) + goto out; + } else { bnxt_ptp_timecounter_init(bp, true); + } ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { From 195af57914d15229186658ed26dab24b9ada4122 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 2 May 2022 21:13:12 -0400 Subject: [PATCH 515/803] bnxt_en: Fix unnecessary dropping of RX packets In bnxt_poll_p5(), we first check cpr->has_more_work. If it is true, we are in NAPI polling mode and we will call __bnxt_poll_cqs() to continue polling. It is possible to exhanust the budget again when __bnxt_poll_cqs() returns. We then enter the main while loop to check for new entries in the NQ. If we had previously exhausted the NAPI budget, we may call __bnxt_poll_work() to process an RX entry with zero budget. This will cause packets to be dropped unnecessarily, thinking that we are in the netpoll path. Fix it by breaking out of the while loop if we need to process an RX NQ entry with no budget left. We will then exit NAPI and stay in polling mode. Fixes: 389a877a3b20 ("bnxt_en: Process the NQ under NAPI continuous polling.") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2818cfef42f8..1d69fe0737a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2707,6 +2707,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) u32 idx = le32_to_cpu(nqcmp->cq_handle_low); struct bnxt_cp_ring_info *cpr2; + /* No more budget for RX work */ + if (budget && work_done >= budget && idx == BNXT_RX_HDL) + break; + cpr2 = cpr->cp_ring_arr[idx]; work_done += __bnxt_poll_work(bp, cpr2, budget - work_done); From cb0d54cbf94866b48a73e10a73a55655f808cc7c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 15 Mar 2022 18:20:48 +0200 Subject: [PATCH 516/803] net/mlx5e: Fix wrong source vport matching on tunnel rule When OVS internal port is the vtep device, the first decap rule is matching on the internal port's vport metadata value and then changes the metadata to be the uplink's value. Therefore, following rules on the tunnel, in chain > 0, should avoid matching on internal port metadata and use the uplink vport metadata instead. Select the uplink's metadata value for the source vport match in case the rule is in chain greater than zero, even if the tunnel route device is internal port. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3f63df127091..3b151332e2f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -139,7 +139,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (esw_attr->int_port) + if (attr && !attr->chain && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else From 7ba2d9d8de96696c1451fee1b01da11f45bdc2b9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 3 Mar 2022 19:02:03 +0200 Subject: [PATCH 517/803] net/mlx5: Fix slab-out-of-bounds while reading resource dump menu Resource dump menu may span over more than a single page, support it. Otherwise, menu read may result in a memory access violation: reading outside of the allocated page. Note that page format of the first menu page contains menu headers while the proceeding menu pages contain only records. The KASAN logs are as follows: BUG: KASAN: slab-out-of-bounds in strcmp+0x9b/0xb0 Read of size 1 at addr ffff88812b2e1fd0 by task systemd-udevd/496 CPU: 5 PID: 496 Comm: systemd-udevd Tainted: G B 5.16.0_for_upstream_debug_2022_01_10_23_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1f/0x140 ? strcmp+0x9b/0xb0 ? strcmp+0x9b/0xb0 kasan_report.cold+0x83/0xdf ? strcmp+0x9b/0xb0 strcmp+0x9b/0xb0 mlx5_rsc_dump_init+0x4ab/0x780 [mlx5_core] ? mlx5_rsc_dump_destroy+0x80/0x80 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x286/0x400 ? raw_spin_unlock_irqrestore+0x47/0x50 ? aomic_notifier_chain_register+0x32/0x40 mlx5_load+0x104/0x2e0 [mlx5_core] mlx5_init_one+0x41b/0x610 [mlx5_core] .... The buggy address belongs to the object at ffff88812b2e0000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 4048 bytes to the right of 4096-byte region [ffff88812b2e0000, ffff88812b2e1000) The buggy address belongs to the page: page:000000009d69807a refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88812b2e6000 pfn:0x12b2e0 head:000000009d69807a order:3 compound_mapcount:0 compound_pincount:0 flags: 0x8000000000010200(slab|head|zone=2) raw: 8000000000010200 0000000000000000 dead000000000001 ffff888100043040 raw: ffff88812b2e6000 0000000080040000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88812b2e1e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88812b2e1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88812b2e1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88812b2e2000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812b2e2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 12206b17235a ("net/mlx5: Add support for resource dump") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/diag/rsc_dump.c | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index 538adab6878b..c5b560a8b026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = { struct mlx5_rsc_dump { u32 pdn; u32 mkey; + u32 number_of_menu_items; u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; }; @@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name) return -EINVAL; } -static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) { void *data = page_address(page); enum mlx5_sgmt_type sgmt_idx; int num_of_items; char *sgmt_name; void *member; + int size = 0; void *menu; int i; - menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); - num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; - for (i = 0; i < num_of_items; i++) { - member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; + + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); if (sgmt_idx == -EINVAL) @@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, member, segment_type); } + return 0; } static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, @@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) struct mlx5_rsc_dump_cmd *cmd = NULL; struct mlx5_rsc_key key = {}; struct page *page; + int start_idx = 0; int size; int err; @@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) if (err < 0) goto destroy_cmd; - mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); } while (err > 0); From ada09af92e621ab500dd80a16d1d0299a18a1180 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 28 Mar 2022 15:54:52 +0300 Subject: [PATCH 518/803] net/mlx5e: Don't match double-vlan packets if cvlan is not set Currently, match VLAN rule also matches packets that have multiple VLAN headers. This behavior is similar to buggy flower classifier behavior that has recently been fixed. Fix the issue by matching on outer_second_cvlan_tag with value 0 which will cause the HW to verify the packet doesn't contain second vlan header. Fixes: 699e96ddf47f ("net/mlx5e: Support offloading tc double vlan headers match") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3fc15ae7bb1..ac0f73074f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2459,6 +2459,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match.key->vlan_priority); *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } } } else if (*match_level != MLX5_MATCH_NONE) { /* cvlan_tag enabled in match criteria and From c4d963a588a6e7c4ef31160e80697ae8e5a47746 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 6 Apr 2022 10:30:21 +0300 Subject: [PATCH 519/803] net/mlx5e: Fix the calling of update_buffer_lossy() API The arguments of update_buffer_lossy() is in a wrong order. Fix it. Fixes: 88b3d5c90e96 ("net/mlx5e: Fix port buffers cell size value") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 673f1c82d381..c9d5d8d93994 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, - xoff, &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; } From 27b0420fd959e38e3500e60b637d39dfab065645 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:19 +0300 Subject: [PATCH 520/803] net/mlx5e: Lag, Fix use-after-free in fib event handler Recent commit that modified fib route event handler to handle events according to their priority introduced use-after-free[0] in mp->mfi pointer usage. The pointer now is not just cached in order to be compared to following fib_info instances, but is also dereferenced to obtain fib_priority. However, since mlx5 lag code doesn't hold the reference to fin_info during whole mp->mfi lifetime, it could be used after fib_info instance has already been freed be kernel infrastructure code. Don't ever dereference mp->mfi pointer. Refactor it to be 'const void*' type and cache fib_info priority in dedicated integer. Group fib_info-related data into dedicated 'fib' structure that will be further extended by following patches in the series. [0]: [ 203.588029] ================================================================== [ 203.590161] BUG: KASAN: use-after-free in mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.592386] Read of size 4 at addr ffff888144df2050 by task kworker/u20:4/138 [ 203.594766] CPU: 3 PID: 138 Comm: kworker/u20:4 Tainted: G B 5.17.0-rc7+ #6 [ 203.596751] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 203.598813] Workqueue: mlx5_lag_mp mlx5_lag_fib_update [mlx5_core] [ 203.600053] Call Trace: [ 203.600608] [ 203.601110] dump_stack_lvl+0x48/0x5e [ 203.601860] print_address_description.constprop.0+0x1f/0x160 [ 203.602950] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.604073] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.605177] kasan_report.cold+0x83/0xdf [ 203.605969] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.607102] mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.608199] ? mlx5_lag_init_fib_work+0x1c0/0x1c0 [mlx5_core] [ 203.609382] ? read_word_at_a_time+0xe/0x20 [ 203.610463] ? strscpy+0xa0/0x2a0 [ 203.611463] process_one_work+0x722/0x1270 [ 203.612344] worker_thread+0x540/0x11e0 [ 203.613136] ? rescuer_thread+0xd50/0xd50 [ 203.613949] kthread+0x26e/0x300 [ 203.614627] ? kthread_complete_and_exit+0x20/0x20 [ 203.615542] ret_from_fork+0x1f/0x30 [ 203.616273] [ 203.617174] Allocated by task 3746: [ 203.617874] kasan_save_stack+0x1e/0x40 [ 203.618644] __kasan_kmalloc+0x81/0xa0 [ 203.619394] fib_create_info+0xb41/0x3c50 [ 203.620213] fib_table_insert+0x190/0x1ff0 [ 203.621020] fib_magic.isra.0+0x246/0x2e0 [ 203.621803] fib_add_ifaddr+0x19f/0x670 [ 203.622563] fib_inetaddr_event+0x13f/0x270 [ 203.623377] blocking_notifier_call_chain+0xd4/0x130 [ 203.624355] __inet_insert_ifa+0x641/0xb20 [ 203.625185] inet_rtm_newaddr+0xc3d/0x16a0 [ 203.626009] rtnetlink_rcv_msg+0x309/0x880 [ 203.626826] netlink_rcv_skb+0x11d/0x340 [ 203.627626] netlink_unicast+0x4cc/0x790 [ 203.628430] netlink_sendmsg+0x762/0xc00 [ 203.629230] sock_sendmsg+0xb2/0xe0 [ 203.629955] ____sys_sendmsg+0x58a/0x770 [ 203.630756] ___sys_sendmsg+0xd8/0x160 [ 203.631523] __sys_sendmsg+0xb7/0x140 [ 203.632294] do_syscall_64+0x35/0x80 [ 203.633045] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.634427] Freed by task 0: [ 203.635063] kasan_save_stack+0x1e/0x40 [ 203.635844] kasan_set_track+0x21/0x30 [ 203.636618] kasan_set_free_info+0x20/0x30 [ 203.637450] __kasan_slab_free+0xfc/0x140 [ 203.638271] kfree+0x94/0x3b0 [ 203.638903] rcu_core+0x5e4/0x1990 [ 203.639640] __do_softirq+0x1ba/0x5d3 [ 203.640828] Last potentially related work creation: [ 203.641785] kasan_save_stack+0x1e/0x40 [ 203.642571] __kasan_record_aux_stack+0x9f/0xb0 [ 203.643478] call_rcu+0x88/0x9c0 [ 203.644178] fib_release_info+0x539/0x750 [ 203.644997] fib_table_delete+0x659/0xb80 [ 203.645809] fib_magic.isra.0+0x1a3/0x2e0 [ 203.646617] fib_del_ifaddr+0x93f/0x1300 [ 203.647415] fib_inetaddr_event+0x9f/0x270 [ 203.648251] blocking_notifier_call_chain+0xd4/0x130 [ 203.649225] __inet_del_ifa+0x474/0xc10 [ 203.650016] devinet_ioctl+0x781/0x17f0 [ 203.650788] inet_ioctl+0x1ad/0x290 [ 203.651533] sock_do_ioctl+0xce/0x1c0 [ 203.652315] sock_ioctl+0x27b/0x4f0 [ 203.653058] __x64_sys_ioctl+0x124/0x190 [ 203.653850] do_syscall_64+0x35/0x80 [ 203.654608] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.666952] The buggy address belongs to the object at ffff888144df2000 which belongs to the cache kmalloc-256 of size 256 [ 203.669250] The buggy address is located 80 bytes inside of 256-byte region [ffff888144df2000, ffff888144df2100) [ 203.671332] The buggy address belongs to the page: [ 203.672273] page:00000000bf6c9314 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x144df0 [ 203.674009] head:00000000bf6c9314 order:2 compound_mapcount:0 compound_pincount:0 [ 203.675422] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) [ 203.676819] raw: 002ffff800010200 0000000000000000 dead000000000122 ffff888100042b40 [ 203.678384] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 203.679928] page dumped because: kasan: bad access detected [ 203.681455] Memory state around the buggy address: [ 203.682421] ffff888144df1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.683863] ffff888144df1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.685310] >ffff888144df2000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.686701] ^ [ 203.687820] ffff888144df2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.689226] ffff888144df2100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.690620] ================================================================== Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 26 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/lag/mp.h | 5 +++- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 4a6ec15ef046..bc77aba97ac1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,6 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; +} + struct mlx5_fib_event_work { struct work_struct work; struct mlx5_lag *ldev; @@ -121,13 +127,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { /* stop track */ - if (mp->mfi == fi) - mp->mfi = NULL; + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; return; } /* Handle multipath entry with lower priority value */ - if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority) + if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -145,7 +151,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); return; } @@ -165,7 +171,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* First time we see multipath route */ - if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { struct lag_tracker tracker; tracker = ldev->tracker; @@ -173,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -184,7 +190,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct lag_mp *mp = &ldev->lag_mp; /* Check the nh event is related to the route */ - if (!mp->mfi || mp->mfi != fi) + if (!mp->fib.mfi || mp->fib.mfi != fi) return; /* nh added/removed */ @@ -313,7 +319,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev) /* Clear mfi, as it might become stale when a route delete event * has been missed, see mlx5_lag_fib_route_event(). */ - ldev->lag_mp.mfi = NULL; + ldev->lag_mp.fib.mfi = NULL; } int mlx5_lag_mp_init(struct mlx5_lag *ldev) @@ -324,7 +330,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) /* always clear mfi, as it might become stale when a route delete event * has been missed */ - mp->mfi = NULL; + mp->fib.mfi = NULL; if (mp->fib_nb.notifier_call) return 0; @@ -354,5 +360,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; - mp->mfi = NULL; + mp->fib.mfi = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 57af962cad29..143226753c3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -15,7 +15,10 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; - struct fib_info *mfi; /* used in tracking fib events */ + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + } fib; struct workqueue_struct *wq; }; From a6589155ec9847918e00e7279b8aa6d4c272bea7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:54 +0300 Subject: [PATCH 521/803] net/mlx5e: Lag, Fix fib_info pointer assignment Referenced change incorrectly sets single path fib_info even when LAG is not active. Fix it by moving call to mlx5_lag_fib_set() into conditional that verifies LAG state. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bc77aba97ac1..9a5884e8a8bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -149,9 +149,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi); } - mlx5_lag_fib_set(mp, fi); return; } From 4a2a664ed87962c4ddb806a84b5c9634820bcf55 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:40:37 +0300 Subject: [PATCH 522/803] net/mlx5e: Lag, Don't skip fib events on current dst Referenced change added check to skip updating fib when new fib instance has same or lower priority. However, new fib instance can be an update on same dst address as existing one even though the structure is another instance that has different address. Ignoring events on such instances causes multipath LAG state to not be correctly updated. Track 'dst' and 'dst_len' fields of fib event fib_entry_notifier_info structure and don't skip events that have the same value of that fields. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 20 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/lag/mp.h | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 9a5884e8a8bf..d6c3e6dfd71f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,10 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } -static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) { mp->fib.mfi = fi; mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; } struct mlx5_fib_event_work { @@ -116,10 +118,10 @@ struct mlx5_fib_event_work { }; }; -static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_info *fi) +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) { + struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; struct fib_nh *fib_nh0, *fib_nh1; unsigned int nhs; @@ -133,7 +135,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle multipath entry with lower priority value */ - if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -149,7 +153,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } return; @@ -179,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -220,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work) case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, - fib_work->fen_info.fi); + &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 143226753c3a..056a066da604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -18,6 +18,8 @@ struct lag_mp { struct { const void *mfi; /* used in tracking fib events */ u32 priority; + u32 dst; + int dst_len; } fib; struct workqueue_struct *wq; }; From 087032ee7021a22e4c7557c0ed16bfd792c3f6fe Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 23 Feb 2022 21:29:17 +0200 Subject: [PATCH 523/803] net/mlx5e: TC, Fix ct_clear overwriting ct action metadata ct_clear action is translated to clearing reg_c metadata which holds ct state and zone information using mod header actions. These actions are allocated during the actions parsing, as part of the flow attributes main mod header action list. If ct action exists in the rule, the flow's main mod header is used only in the post action table rule, after the ct tables which set the ct info in the reg_c as part of the ct actions. Therefore, if the original rule has a ct_clear action followed by a ct action, the ct action reg_c setting will be done first and will be followed by the ct_clear resetting reg_c and overwriting the ct info. Fix this by moving the ct_clear mod header actions allocation from the ct action parsing stage to the ct action post parsing stage where it is already known if ct_clear is followed by a ct action. In such case, we skip the mod header actions allocation for the ct clear since the ct action will write to reg_c anyway after clearing it. Fixes: 806401c20a0f ("net/mlx5e: CT, Fix multiple allocations and memleak of mod acts") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/act/ct.c | 34 +++++++++++++++++-- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 ++++------- .../ethernet/mellanox/mlx5/core/en/tc_ct.h | 11 ++++++ 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index b9d38fe807df..a829c94289c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -45,12 +45,41 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; - if (!clear_action) { + if (clear_action) { + parse_state->ct_clear = true; + } else { attr->flags |= MLX5_ATTR_FLAG_CT; flow_flag_set(parse_state->flow, CT); parse_state->ct = true; } - parse_state->ct_clear = clear_action; + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; + int err; + + /* If ct action exist, we can ignore previous ct_clear actions */ + if (parse_state->ct) + return 0; + + if (parse_state->ct_clear) { + err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Prevent handling of additional, redundant clear actions */ + parse_state->ct_clear = false; + } return 0; } @@ -70,5 +99,6 @@ struct mlx5e_tc_act mlx5e_tc_act_ct = { .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, .is_multi_table_act = tc_act_is_multi_table_act_ct, + .post_parse = tc_act_post_parse_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e49f51124c74..73a1e0a4818d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -582,6 +582,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return 0; } +int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); +} + static int mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, char *modact) @@ -1410,9 +1416,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; - int err; - if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1423,17 +1426,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; - if (!clear_action) - goto out; - - err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); - return err; - } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - -out: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 36d3652bf829..00a3ba862afb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -129,6 +129,10 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -170,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) return 0; } +static inline int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return -EOPNOTSUPP; +} + static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, From e3fdc71bcb6ffe1d4870a89252ba296a9558e294 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 25 Apr 2022 17:12:12 +0300 Subject: [PATCH 524/803] net/mlx5e: TC, fix decap fallback to uplink when int port not supported When resolving the decap route device for a tunnel decap rule, the result may be an OVS internal port device. Prior to adding the support for internal port offload, such case would result in using the uplink as the default decap route device which allowed devices that can't support internal port offload to offload this decap rule. This behavior got broken by adding the internal port offload which will fail in case the device can't support internal port offload. To restore the old behavior, use the uplink device as the decap route as before when internal port offload is not supported. Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 378fc8e3bd97..d87bbb0be7c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -713,6 +713,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct net_device *filter_dev) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_int_port *int_port; TC_TUN_ROUTE_ATTR_INIT(attr); u16 vport_num; @@ -747,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni); esw_attr->rx_tun_attr->decap_vport = vport_num; - } else if (netif_is_ovs_master(attr.route_dev)) { + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), attr.route_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS); From b069e14fff46c8da9fcc79957f8acaa3e2dfdb6b Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 29 Mar 2022 17:42:46 +0300 Subject: [PATCH 525/803] net/mlx5e: CT: Fix queued up restore put() executing after relevant ft release __mlx5_tc_ct_entry_put() queues release of tuple related to some ct FT, if that is the last reference to that tuple, the actual deletion of the tuple can happen after the FT is already destroyed and freed. Flush the used workqueue before destroying the ct FT. Fixes: a2173131526d ("net/mlx5e: CT: manage the lifetime of the ct entry object") Reviewed-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 73a1e0a4818d..ab4b0f3ee2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1741,6 +1741,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { + struct mlx5e_priv *priv; + if (!refcount_dec_and_test(&ft->refcount)) return; @@ -1750,6 +1752,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + priv = netdev_priv(ct_priv->netdev); + flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); From 0e322efd64d2ad11a773f7a314512086ebbe000c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 28 Mar 2022 18:29:13 +0300 Subject: [PATCH 526/803] net/mlx5e: Avoid checking offload capability in post_parse action During TC action parsing, the can_offload callback is called before calling the action's main parsing callback. Later on, the can_offload callback is called again before handling the action's post_parse callback if exists. Since the main parsing callback might have changed and set parsing params for the rule, following can_offload checks might fail because some parsing params were already set. Specifically, the ct action main parsing sets the ct param in the parsing status structure and when the second can_offload for ct action is called, before handling the ct post parsing, it will return an error since it checks this ct param to indicate multiple ct actions which are not supported. Therefore, the can_offload call is removed from the post parsing handling to prevent such cases. This is allowed since the first can_offload call will ensure that the action can be offloaded and the fact the code reached the post parsing handling already means that the action can be offloaded. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index af37a8d247a1..2755c25ba324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -145,8 +145,7 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, flow_action_for_each(i, act, flow_action) { tc_act = mlx5e_tc_act_get(act->id, ns_type); - if (!tc_act || !tc_act->post_parse || - !tc_act->can_offload(parse_state, act, i, attr)) + if (!tc_act || !tc_act->post_parse) continue; err = tc_act->post_parse(parse_state, priv, attr); From b781bff882d16175277ca129c382886cb4c74a2c Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Wed, 9 Feb 2022 19:23:56 +0200 Subject: [PATCH 527/803] net/mlx5e: Fix trust state reset in reload Setting dscp2prio during the driver reload can cause dcb ieee app list to be not empty after the reload finish and as a result to a conflict between the priority trust state reported by the app and the state in the device register. Reset the dcb ieee app list on initialization in case this is conflicting with the register status. Fixes: 2a5e7a1344f4 ("net/mlx5e: Add dcbnl dscp to priority support") Signed-off-by: Moshe Tal Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d659fe07d464..8ead2c82a52a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1200,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) return err; WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); From cb7786a76ea39f394f0a059787fe24fa8e340fb6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 21:31:06 +0300 Subject: [PATCH 528/803] net/mlx5: Fix deadlock in sync reset flow The sync reset flow can lead to the following deadlock when poll_sync_reset() is called by timer softirq and waiting on del_timer_sync() for the same timer. Fix that by moving the part of the flow that waits for the timer to reset_reload_work. It fixes the following kernel Trace: RIP: 0010:del_timer_sync+0x32/0x40 ... Call Trace: mlx5_sync_reset_clear_reset_requested+0x26/0x50 [mlx5_core] poll_sync_reset.cold+0x36/0x52 [mlx5_core] call_timer_fn+0x32/0x130 __run_timers.part.0+0x180/0x280 ? tick_sched_handle+0x33/0x60 ? tick_sched_timer+0x3d/0x80 ? ktime_get+0x3e/0xa0 run_timer_softirq+0x2a/0x50 __do_softirq+0xe1/0x2d6 ? hrtimer_interrupt+0x136/0x220 irq_exit+0xae/0xb0 smp_apic_timer_interrupt+0x7b/0x140 apic_timer_interrupt+0xf/0x20 Fixes: 3c5193a87b0f ("net/mlx5: Use del_timer_sync in fw reset flow of halting poll") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4aa22dce9b77..ec18d4ccbc11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -155,22 +155,6 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) } } -static void mlx5_sync_reset_reload_work(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_reload_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); - err = mlx5_health_wait_pci_up(dev); - if (err) - mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); -} - static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; @@ -188,6 +172,23 @@ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, boo mlx5_start_health_poll(dev); } +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_unload_one(dev); + err = mlx5_health_wait_pci_up(dev); + if (err) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + #define MLX5_RESET_POLL_INTERVAL (HZ / 10) static void poll_sync_reset(struct timer_list *t) { @@ -202,7 +203,6 @@ static void poll_sync_reset(struct timer_list *t) if (fatal_error) { mlx5_core_warn(dev, "Got Device Reset\n"); - mlx5_sync_reset_clear_reset_requested(dev, false); queue_work(fw_reset->wq, &fw_reset->reset_reload_work); return; } From fc3d3db07b35885f238e1fa06b9f04a8fa7a62d0 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 20:38:44 +0300 Subject: [PATCH 529/803] net/mlx5: Avoid double clear or set of sync reset requested Double clear of reset requested state can lead to NULL pointer as it will try to delete the timer twice. This can happen for example on a race between abort from FW and pci error or reset. Avoid such case using test_and_clear_bit() to verify only one time reset requested state clear flow. Similarly use test_and_set_bit() to verify only one time reset requested state set flow. Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ec18d4ccbc11..ca1aba845dd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) del_timer_sync(&fw_reset->timer); } -static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + mlx5_stop_sync_reset_poll(dev); - clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); if (poll_health) mlx5_start_health_poll(dev); + return 0; } static void mlx5_sync_reset_reload_work(struct work_struct *work) @@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); } -static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } mlx5_stop_health_poll(dev, true); - set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); mlx5_start_sync_reset_poll(dev); + return 0; } static void mlx5_fw_live_patch_event(struct work_struct *work) @@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) err ? "Failed" : "Sent"); return; } - mlx5_sync_reset_set_reset_requested(dev); + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); @@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - mlx5_sync_reset_clear_reset_requested(dev, false); + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); @@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) reset_abort_work); struct mlx5_core_dev *dev = fw_reset->dev; - if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; - - mlx5_sync_reset_clear_reset_requested(dev, true); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } From a042d7f5bb68c47f6e0e546ca367d14e1e4b25ba Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 10 Apr 2022 11:58:05 +0000 Subject: [PATCH 530/803] net/mlx5: Fix matching on inner TTC The cited commits didn't use proper matching on inner TTC as a result distribution of encapsulated packets wasn't symmetric between the physical ports. Fixes: 4c71ce50d2fe ("net/mlx5: Support partial TTC rules") Fixes: 8e25a2bc6687 ("net/mlx5: Lag, add support to create TTC tables for LAG port selection") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index a6592f9c3c05..5be322528279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -505,7 +505,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) struct ttc_params ttc_params = {}; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); - port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); if (IS_ERR(port_sel->inner.ttc)) return PTR_ERR(port_sel->inner.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index b63dec24747a..b78f2ba25c19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (test_bit(tt, params->ignore_dests)) + continue; rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, From 841e512ffb64898db6322c0619f6bbc41266d86f Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 26 Apr 2022 16:15:36 +0200 Subject: [PATCH 531/803] drm/bridge: ite-it6505: add missing Kconfig option select The IT6505 is using functions provided by the DRM_DP_HELPER driver. In order to avoid having the bridge enabled but the helper disabled, let's add a select in order to be sure that the DP helper functions are always available. Fixes: b5c84a9edcd4 ("drm/bridge: add it6505 driver") Signed-off-by: Fabien Parent Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20220426141536.274727-1-fparent@baylibre.com --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 007e5a282f67..2145b08f9534 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -78,6 +78,7 @@ config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on OF select DRM_KMS_HELPER + select DRM_DP_HELPER select EXTCON help ITE IT6505 DisplayPort bridge chip driver. From 5edde870d3283edeaa27ab62ac4fac5ee8cae35a Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Sun, 13 Mar 2022 21:43:21 -0500 Subject: [PATCH 532/803] iommu/amd: Do not call sleep while holding spinlock Smatch static checker warns: drivers/iommu/amd/iommu_v2.c:133 free_device_state() warn: sleeping in atomic context Fixes by storing the list of struct device_state in a temporary list, and then free the memory after releasing the spinlock. Reported-by: Dan Carpenter Fixes: 9f968fc70d85 ("iommu/amd: Improve amd_iommu_v2_exit()") Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220314024321.37411-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index e56b137ceabd..afb3efd565b7 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -956,6 +956,7 @@ static void __exit amd_iommu_v2_exit(void) { struct device_state *dev_state, *next; unsigned long flags; + LIST_HEAD(freelist); if (!amd_iommu_v2_supported()) return; @@ -975,11 +976,20 @@ static void __exit amd_iommu_v2_exit(void) put_device_state(dev_state); list_del(&dev_state->list); - free_device_state(dev_state); + list_add_tail(&dev_state->list, &freelist); } spin_unlock_irqrestore(&state_lock, flags); + /* + * Since free_device_state waits on the count to be zero, + * we need to free dev_state outside the spinlock. + */ + list_for_each_entry_safe(dev_state, next, &freelist, list) { + list_del(&dev_state->list); + free_device_state(dev_state); + } + destroy_workqueue(iommu_wq); } From 9ed1d7f510336e283c0bb2d20fd771e9d8fa085b Mon Sep 17 00:00:00 2001 From: Vasant Hegde via iommu Date: Mon, 14 Mar 2022 12:32:26 +0530 Subject: [PATCH 533/803] iommu/amd: Remove redundant check smatch static checker warning: drivers/iommu/amd/init.c:1989 amd_iommu_init_pci() warn: duplicate check 'ret' (previous on line 1978) Reported-by: Dan Carpenter Fixes: 06687a03805e ("iommu/amd: Improve error handling for amd_iommu_init_pci") Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220314070226.40641-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index b4a798c7b347..648d6b94ba8c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1986,8 +1986,7 @@ static int __init amd_iommu_init_pci(void) for_each_iommu(iommu) iommu_flush_all_caches(iommu); - if (!ret) - print_iommu_info(); + print_iommu_info(); out: return ret; From 2ac2fab52917ae82cbca97cf6e5d2993530257ed Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 2 May 2022 18:22:38 +0900 Subject: [PATCH 534/803] iommu/dart: Add missing module owner to ops structure This is required to make loading this as a module work. Signed-off-by: Hector Martin Fixes: 46d1fb072e76 ("iommu/dart: Add DART iommu driver") Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20220502092238.30486-1-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 15b77f16cfa3..8af0242a90d9 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -773,6 +773,7 @@ static const struct iommu_ops apple_dart_iommu_ops = { .get_resv_regions = apple_dart_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ + .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = apple_dart_attach_dev, .detach_dev = apple_dart_detach_dev, From 6625ffb90f44efc28a3fe1432191a4eacb08c915 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:52 +0800 Subject: [PATCH 535/803] dt-bindings: mediatek: mt8195: Add binding for MM IOMMU This patch adds descriptions for mt8195 IOMMU which also use ARM Short-Descriptor translation table format. In mt8195, there are two smi-common HW and IOMMU, one is for vdo(video output), the other is for vpp(video processing pipe). They connects with different smi-larbs, then some setting(larbid_remap) is different. Differentiate them with the compatible string. Something like this: IOMMU(VDO) IOMMU(VPP) | | SMI_COMMON_VDO SMI_COMMON_VPP --------------- ---------------- | | ... | | ... larb0 larb2 ... larb1 larb3 ... Another change is that we have a new IOMMU that is for infra master like PCIe and USB. The infra master don't have the larb and ports, thus we rename the port header file to mt8195-memory-port.h rather than mt8195-larb-port.h. Also, the IOMMU is not only for MM, thus, we don't call it "m4u" which means "MultiMedia Memory Management UNIT". thus, use the "iommu" as the compatiable string. Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-2-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 7 + .../dt-bindings/memory/mt8195-memory-port.h | 390 ++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 include/dt-bindings/memory/mt8195-memory-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 97e8c471a5e8..2223408e91a9 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -77,6 +77,8 @@ properties: - mediatek,mt8173-m4u # generation two - mediatek,mt8183-m4u # generation two - mediatek,mt8192-m4u # generation two + - mediatek,mt8195-iommu-vdo # generation two + - mediatek,mt8195-iommu-vpp # generation two - description: mt7623 generation one items: @@ -120,6 +122,7 @@ properties: dt-binding/memory/mt8173-larb-port.h for mt8173, dt-binding/memory/mt8183-larb-port.h for mt8183, dt-binding/memory/mt8192-larb-port.h for mt8192. + dt-binding/memory/mt8195-memory-port.h for mt8195. power-domains: maxItems: 1 @@ -141,6 +144,8 @@ allOf: - mediatek,mt2712-m4u - mediatek,mt8173-m4u - mediatek,mt8192-m4u + - mediatek,mt8195-iommu-vdo + - mediatek,mt8195-iommu-vpp then: required: @@ -151,6 +156,8 @@ allOf: compatible: enum: - mediatek,mt8192-m4u + - mediatek,mt8195-iommu-vdo + - mediatek,mt8195-iommu-vpp then: required: diff --git a/include/dt-bindings/memory/mt8195-memory-port.h b/include/dt-bindings/memory/mt8195-memory-port.h new file mode 100644 index 000000000000..c10e8b61f1e8 --- /dev/null +++ b/include/dt-bindings/memory/mt8195-memory-port.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8195_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8195_LARB_PORT_H_ + +#include + +/* + * MM IOMMU supports 16GB dma address. We separate it to four ranges: + * 0 ~ 4G; 4G ~ 8G; 8G ~ 12G; 12G ~ 16G, we could adjust these masters + * locate in anyone region. BUT: + * a) Make sure all the ports inside a larb are in one range. + * b) The iova of any master can NOT cross the 4G/8G/12G boundary. + * + * This is the suggested mapping in this SoC: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/1/2/3 + * vcodec 4G ~ 8G larb19/20/21/22/23/24 + * cam/mdp 8G ~ 12G the other larbs. + * N/A 12G ~ 16G + * CCU0 0x24000_0000 ~ 0x243ff_ffff larb18: port 0/1 + * CCU1 0x24400_0000 ~ 0x247ff_ffff larb18: port 2/3 + * + * This SoC have two IOMMU HWs, this is the detailed connected information: + * iommu-vdo: larb0/2/5/7/9/10/11/13/17/19/21/24/25/28 + * iommu-vpp: larb1/3/4/6/8/12/14/16/18/20/22/23/26/27 + */ + +/* MM IOMMU ports */ +/* larb0 */ +#define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_ID(0, 0) +#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_ID(0, 1) +#define M4U_PORT_L0_DISP_OVL0_RDMA0 MTK_M4U_ID(0, 2) +#define M4U_PORT_L0_DISP_OVL0_RDMA1 MTK_M4U_ID(0, 3) +#define M4U_PORT_L0_DISP_OVL0_HDR MTK_M4U_ID(0, 4) +#define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_ID(0, 5) + +/* larb1 */ +#define M4U_PORT_L1_DISP_RDMA0 MTK_M4U_ID(1, 0) +#define M4U_PORT_L1_DISP_WDMA0 MTK_M4U_ID(1, 1) +#define M4U_PORT_L1_DISP_OVL0_RDMA0 MTK_M4U_ID(1, 2) +#define M4U_PORT_L1_DISP_OVL0_RDMA1 MTK_M4U_ID(1, 3) +#define M4U_PORT_L1_DISP_OVL0_HDR MTK_M4U_ID(1, 4) +#define M4U_PORT_L1_DISP_FAKE0 MTK_M4U_ID(1, 5) + +/* larb2 */ +#define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_ID(2, 0) +#define M4U_PORT_L2_MDP_RDMA2 MTK_M4U_ID(2, 1) +#define M4U_PORT_L2_MDP_RDMA4 MTK_M4U_ID(2, 2) +#define M4U_PORT_L2_MDP_RDMA6 MTK_M4U_ID(2, 3) +#define M4U_PORT_L2_DISP_FAKE1 MTK_M4U_ID(2, 4) + +/* larb3 */ +#define M4U_PORT_L3_MDP_RDMA1 MTK_M4U_ID(3, 0) +#define M4U_PORT_L3_MDP_RDMA3 MTK_M4U_ID(3, 1) +#define M4U_PORT_L3_MDP_RDMA5 MTK_M4U_ID(3, 2) +#define M4U_PORT_L3_MDP_RDMA7 MTK_M4U_ID(3, 3) +#define M4U_PORT_L3_HDR_DS MTK_M4U_ID(3, 4) +#define M4U_PORT_L3_HDR_ADL MTK_M4U_ID(3, 5) +#define M4U_PORT_L3_DISP_FAKE1 MTK_M4U_ID(3, 6) + +/* larb4 */ +#define M4U_PORT_L4_MDP_RDMA MTK_M4U_ID(4, 0) +#define M4U_PORT_L4_MDP_FG MTK_M4U_ID(4, 1) +#define M4U_PORT_L4_MDP_OVL MTK_M4U_ID(4, 2) +#define M4U_PORT_L4_MDP_WROT MTK_M4U_ID(4, 3) +#define M4U_PORT_L4_FAKE MTK_M4U_ID(4, 4) + +/* larb5 */ +#define M4U_PORT_L5_SVPP1_MDP_RDMA MTK_M4U_ID(5, 0) +#define M4U_PORT_L5_SVPP1_MDP_FG MTK_M4U_ID(5, 1) +#define M4U_PORT_L5_SVPP1_MDP_OVL MTK_M4U_ID(5, 2) +#define M4U_PORT_L5_SVPP1_MDP_WROT MTK_M4U_ID(5, 3) +#define M4U_PORT_L5_SVPP2_MDP_RDMA MTK_M4U_ID(5, 4) +#define M4U_PORT_L5_SVPP2_MDP_FG MTK_M4U_ID(5, 5) +#define M4U_PORT_L5_SVPP2_MDP_WROT MTK_M4U_ID(5, 6) +#define M4U_PORT_L5_FAKE MTK_M4U_ID(5, 7) + +/* larb6 */ +#define M4U_PORT_L6_SVPP3_MDP_RDMA MTK_M4U_ID(6, 0) +#define M4U_PORT_L6_SVPP3_MDP_FG MTK_M4U_ID(6, 1) +#define M4U_PORT_L6_SVPP3_MDP_WROT MTK_M4U_ID(6, 2) +#define M4U_PORT_L6_FAKE MTK_M4U_ID(6, 3) + +/* larb7 */ +#define M4U_PORT_L7_IMG_WPE_RDMA0 MTK_M4U_ID(7, 0) +#define M4U_PORT_L7_IMG_WPE_RDMA1 MTK_M4U_ID(7, 1) +#define M4U_PORT_L7_IMG_WPE_WDMA0 MTK_M4U_ID(7, 2) + +/* larb8 */ +#define M4U_PORT_L8_IMG_WPE_RDMA0 MTK_M4U_ID(8, 0) +#define M4U_PORT_L8_IMG_WPE_RDMA1 MTK_M4U_ID(8, 1) +#define M4U_PORT_L8_IMG_WPE_WDMA0 MTK_M4U_ID(8, 2) + +/* larb9 */ +#define M4U_PORT_L9_IMG_IMGI_T1_A MTK_M4U_ID(9, 0) +#define M4U_PORT_L9_IMG_IMGBI_T1_A MTK_M4U_ID(9, 1) +#define M4U_PORT_L9_IMG_IMGCI_T1_A MTK_M4U_ID(9, 2) +#define M4U_PORT_L9_IMG_SMTI_T1_A MTK_M4U_ID(9, 3) +#define M4U_PORT_L9_IMG_TNCSTI_T1_A MTK_M4U_ID(9, 4) +#define M4U_PORT_L9_IMG_TNCSTI_T4_A MTK_M4U_ID(9, 5) +#define M4U_PORT_L9_IMG_YUVO_T1_A MTK_M4U_ID(9, 6) +#define M4U_PORT_L9_IMG_TIMGO_T1_A MTK_M4U_ID(9, 7) +#define M4U_PORT_L9_IMG_YUVO_T2_A MTK_M4U_ID(9, 8) +#define M4U_PORT_L9_IMG_IMGI_T1_B MTK_M4U_ID(9, 9) +#define M4U_PORT_L9_IMG_IMGBI_T1_B MTK_M4U_ID(9, 10) +#define M4U_PORT_L9_IMG_IMGCI_T1_B MTK_M4U_ID(9, 11) +#define M4U_PORT_L9_IMG_YUVO_T5_A MTK_M4U_ID(9, 12) +#define M4U_PORT_L9_IMG_SMTI_T1_B MTK_M4U_ID(9, 13) +#define M4U_PORT_L9_IMG_TNCSO_T1_A MTK_M4U_ID(9, 14) +#define M4U_PORT_L9_IMG_SMTO_T1_A MTK_M4U_ID(9, 15) +#define M4U_PORT_L9_IMG_TNCSTO_T1_A MTK_M4U_ID(9, 16) +#define M4U_PORT_L9_IMG_YUVO_T2_B MTK_M4U_ID(9, 17) +#define M4U_PORT_L9_IMG_YUVO_T5_B MTK_M4U_ID(9, 18) +#define M4U_PORT_L9_IMG_SMTO_T1_B MTK_M4U_ID(9, 19) + +/* larb10 */ +#define M4U_PORT_L10_IMG_IMGI_D1_A MTK_M4U_ID(10, 0) +#define M4U_PORT_L10_IMG_IMGCI_D1_A MTK_M4U_ID(10, 1) +#define M4U_PORT_L10_IMG_DEPI_D1_A MTK_M4U_ID(10, 2) +#define M4U_PORT_L10_IMG_DMGI_D1_A MTK_M4U_ID(10, 3) +#define M4U_PORT_L10_IMG_VIPI_D1_A MTK_M4U_ID(10, 4) +#define M4U_PORT_L10_IMG_TNRWI_D1_A MTK_M4U_ID(10, 5) +#define M4U_PORT_L10_IMG_RECI_D1_A MTK_M4U_ID(10, 6) +#define M4U_PORT_L10_IMG_SMTI_D1_A MTK_M4U_ID(10, 7) +#define M4U_PORT_L10_IMG_SMTI_D6_A MTK_M4U_ID(10, 8) +#define M4U_PORT_L10_IMG_PIMGI_P1_A MTK_M4U_ID(10, 9) +#define M4U_PORT_L10_IMG_PIMGBI_P1_A MTK_M4U_ID(10, 10) +#define M4U_PORT_L10_IMG_PIMGCI_P1_A MTK_M4U_ID(10, 11) +#define M4U_PORT_L10_IMG_PIMGI_P1_B MTK_M4U_ID(10, 12) +#define M4U_PORT_L10_IMG_PIMGBI_P1_B MTK_M4U_ID(10, 13) +#define M4U_PORT_L10_IMG_PIMGCI_P1_B MTK_M4U_ID(10, 14) +#define M4U_PORT_L10_IMG_IMG3O_D1_A MTK_M4U_ID(10, 15) +#define M4U_PORT_L10_IMG_IMG4O_D1_A MTK_M4U_ID(10, 16) +#define M4U_PORT_L10_IMG_IMG3CO_D1_A MTK_M4U_ID(10, 17) +#define M4U_PORT_L10_IMG_FEO_D1_A MTK_M4U_ID(10, 18) +#define M4U_PORT_L10_IMG_IMG2O_D1_A MTK_M4U_ID(10, 19) +#define M4U_PORT_L10_IMG_TNRWO_D1_A MTK_M4U_ID(10, 20) +#define M4U_PORT_L10_IMG_SMTO_D1_A MTK_M4U_ID(10, 21) +#define M4U_PORT_L10_IMG_WROT_P1_A MTK_M4U_ID(10, 22) +#define M4U_PORT_L10_IMG_WROT_P1_B MTK_M4U_ID(10, 23) + +/* larb11 */ +#define M4U_PORT_L11_IMG_WPE_EIS_RDMA0_A MTK_M4U_ID(11, 0) +#define M4U_PORT_L11_IMG_WPE_EIS_RDMA1_A MTK_M4U_ID(11, 1) +#define M4U_PORT_L11_IMG_WPE_EIS_WDMA0_A MTK_M4U_ID(11, 2) +#define M4U_PORT_L11_IMG_WPE_TNR_RDMA0_A MTK_M4U_ID(11, 3) +#define M4U_PORT_L11_IMG_WPE_TNR_RDMA1_A MTK_M4U_ID(11, 4) +#define M4U_PORT_L11_IMG_WPE_TNR_WDMA0_A MTK_M4U_ID(11, 5) +#define M4U_PORT_L11_IMG_WPE_EIS_CQ0_A MTK_M4U_ID(11, 6) +#define M4U_PORT_L11_IMG_WPE_EIS_CQ1_A MTK_M4U_ID(11, 7) +#define M4U_PORT_L11_IMG_WPE_TNR_CQ0_A MTK_M4U_ID(11, 8) +#define M4U_PORT_L11_IMG_WPE_TNR_CQ1_A MTK_M4U_ID(11, 9) + +/* larb12 */ +#define M4U_PORT_L12_IMG_FDVT_RDA MTK_M4U_ID(12, 0) +#define M4U_PORT_L12_IMG_FDVT_RDB MTK_M4U_ID(12, 1) +#define M4U_PORT_L12_IMG_FDVT_WRA MTK_M4U_ID(12, 2) +#define M4U_PORT_L12_IMG_FDVT_WRB MTK_M4U_ID(12, 3) +#define M4U_PORT_L12_IMG_ME_RDMA MTK_M4U_ID(12, 4) +#define M4U_PORT_L12_IMG_ME_WDMA MTK_M4U_ID(12, 5) +#define M4U_PORT_L12_IMG_DVS_RDMA MTK_M4U_ID(12, 6) +#define M4U_PORT_L12_IMG_DVS_WDMA MTK_M4U_ID(12, 7) +#define M4U_PORT_L12_IMG_DVP_RDMA MTK_M4U_ID(12, 8) +#define M4U_PORT_L12_IMG_DVP_WDMA MTK_M4U_ID(12, 9) + +/* larb13 */ +#define M4U_PORT_L13_CAM_CAMSV_CQI_E1 MTK_M4U_ID(13, 0) +#define M4U_PORT_L13_CAM_CAMSV_CQI_E2 MTK_M4U_ID(13, 1) +#define M4U_PORT_L13_CAM_GCAMSV_A_IMGO_0 MTK_M4U_ID(13, 2) +#define M4U_PORT_L13_CAM_SCAMSV_A_IMGO_0 MTK_M4U_ID(13, 3) +#define M4U_PORT_L13_CAM_GCAMSV_B_IMGO_0 MTK_M4U_ID(13, 4) +#define M4U_PORT_L13_CAM_GCAMSV_B_IMGO_1 MTK_M4U_ID(13, 5) +#define M4U_PORT_L13_CAM_GCAMSV_A_UFEO_0 MTK_M4U_ID(13, 6) +#define M4U_PORT_L13_CAM_GCAMSV_B_UFEO_0 MTK_M4U_ID(13, 7) +#define M4U_PORT_L13_CAM_PDAI_0 MTK_M4U_ID(13, 8) +#define M4U_PORT_L13_CAM_FAKE MTK_M4U_ID(13, 9) + +/* larb14 */ +#define M4U_PORT_L14_CAM_GCAMSV_A_IMGO_1 MTK_M4U_ID(14, 0) +#define M4U_PORT_L14_CAM_SCAMSV_A_IMGO_1 MTK_M4U_ID(14, 1) +#define M4U_PORT_L14_CAM_GCAMSV_B_IMGO_0 MTK_M4U_ID(14, 2) +#define M4U_PORT_L14_CAM_GCAMSV_B_IMGO_1 MTK_M4U_ID(14, 3) +#define M4U_PORT_L14_CAM_SCAMSV_B_IMGO_0 MTK_M4U_ID(14, 4) +#define M4U_PORT_L14_CAM_SCAMSV_B_IMGO_1 MTK_M4U_ID(14, 5) +#define M4U_PORT_L14_CAM_IPUI MTK_M4U_ID(14, 6) +#define M4U_PORT_L14_CAM_IPU2I MTK_M4U_ID(14, 7) +#define M4U_PORT_L14_CAM_IPUO MTK_M4U_ID(14, 8) +#define M4U_PORT_L14_CAM_IPU2O MTK_M4U_ID(14, 9) +#define M4U_PORT_L14_CAM_IPU3O MTK_M4U_ID(14, 10) +#define M4U_PORT_L14_CAM_GCAMSV_A_UFEO_1 MTK_M4U_ID(14, 11) +#define M4U_PORT_L14_CAM_GCAMSV_B_UFEO_1 MTK_M4U_ID(14, 12) +#define M4U_PORT_L14_CAM_PDAI_1 MTK_M4U_ID(14, 13) +#define M4U_PORT_L14_CAM_PDAO MTK_M4U_ID(14, 14) + +/* larb15: null */ + +/* larb16 */ +#define M4U_PORT_L16_CAM_IMGO_R1 MTK_M4U_ID(16, 0) +#define M4U_PORT_L16_CAM_CQI_R1 MTK_M4U_ID(16, 1) +#define M4U_PORT_L16_CAM_CQI_R2 MTK_M4U_ID(16, 2) +#define M4U_PORT_L16_CAM_BPCI_R1 MTK_M4U_ID(16, 3) +#define M4U_PORT_L16_CAM_LSCI_R1 MTK_M4U_ID(16, 4) +#define M4U_PORT_L16_CAM_RAWI_R2 MTK_M4U_ID(16, 5) +#define M4U_PORT_L16_CAM_RAWI_R3 MTK_M4U_ID(16, 6) +#define M4U_PORT_L16_CAM_UFDI_R2 MTK_M4U_ID(16, 7) +#define M4U_PORT_L16_CAM_UFDI_R3 MTK_M4U_ID(16, 8) +#define M4U_PORT_L16_CAM_RAWI_R4 MTK_M4U_ID(16, 9) +#define M4U_PORT_L16_CAM_RAWI_R5 MTK_M4U_ID(16, 10) +#define M4U_PORT_L16_CAM_AAI_R1 MTK_M4U_ID(16, 11) +#define M4U_PORT_L16_CAM_FHO_R1 MTK_M4U_ID(16, 12) +#define M4U_PORT_L16_CAM_AAO_R1 MTK_M4U_ID(16, 13) +#define M4U_PORT_L16_CAM_TSFSO_R1 MTK_M4U_ID(16, 14) +#define M4U_PORT_L16_CAM_FLKO_R1 MTK_M4U_ID(16, 15) + +/* larb17 */ +#define M4U_PORT_L17_CAM_YUVO_R1 MTK_M4U_ID(17, 0) +#define M4U_PORT_L17_CAM_YUVO_R3 MTK_M4U_ID(17, 1) +#define M4U_PORT_L17_CAM_YUVCO_R1 MTK_M4U_ID(17, 2) +#define M4U_PORT_L17_CAM_YUVO_R2 MTK_M4U_ID(17, 3) +#define M4U_PORT_L17_CAM_RZH1N2TO_R1 MTK_M4U_ID(17, 4) +#define M4U_PORT_L17_CAM_DRZS4NO_R1 MTK_M4U_ID(17, 5) +#define M4U_PORT_L17_CAM_TNCSO_R1 MTK_M4U_ID(17, 6) + +/* larb18 */ +#define M4U_PORT_L18_CAM_CCUI MTK_M4U_ID(18, 0) +#define M4U_PORT_L18_CAM_CCUO MTK_M4U_ID(18, 1) +#define M4U_PORT_L18_CAM_CCUI2 MTK_M4U_ID(18, 2) +#define M4U_PORT_L18_CAM_CCUO2 MTK_M4U_ID(18, 3) + +/* larb19 */ +#define M4U_PORT_L19_VENC_RCPU MTK_M4U_ID(19, 0) +#define M4U_PORT_L19_VENC_REC MTK_M4U_ID(19, 1) +#define M4U_PORT_L19_VENC_BSDMA MTK_M4U_ID(19, 2) +#define M4U_PORT_L19_VENC_SV_COMV MTK_M4U_ID(19, 3) +#define M4U_PORT_L19_VENC_RD_COMV MTK_M4U_ID(19, 4) +#define M4U_PORT_L19_VENC_NBM_RDMA MTK_M4U_ID(19, 5) +#define M4U_PORT_L19_VENC_NBM_RDMA_LITE MTK_M4U_ID(19, 6) +#define M4U_PORT_L19_JPGENC_Y_RDMA MTK_M4U_ID(19, 7) +#define M4U_PORT_L19_JPGENC_C_RDMA MTK_M4U_ID(19, 8) +#define M4U_PORT_L19_JPGENC_Q_TABLE MTK_M4U_ID(19, 9) +#define M4U_PORT_L19_VENC_SUB_W_LUMA MTK_M4U_ID(19, 10) +#define M4U_PORT_L19_VENC_FCS_NBM_RDMA MTK_M4U_ID(19, 11) +#define M4U_PORT_L19_JPGENC_BSDMA MTK_M4U_ID(19, 12) +#define M4U_PORT_L19_JPGDEC_WDMA0 MTK_M4U_ID(19, 13) +#define M4U_PORT_L19_JPGDEC_BSDMA0 MTK_M4U_ID(19, 14) +#define M4U_PORT_L19_VENC_NBM_WDMA MTK_M4U_ID(19, 15) +#define M4U_PORT_L19_VENC_NBM_WDMA_LITE MTK_M4U_ID(19, 16) +#define M4U_PORT_L19_VENC_FCS_NBM_WDMA MTK_M4U_ID(19, 17) +#define M4U_PORT_L19_JPGDEC_WDMA1 MTK_M4U_ID(19, 18) +#define M4U_PORT_L19_JPGDEC_BSDMA1 MTK_M4U_ID(19, 19) +#define M4U_PORT_L19_JPGDEC_BUFF_OFFSET1 MTK_M4U_ID(19, 20) +#define M4U_PORT_L19_JPGDEC_BUFF_OFFSET0 MTK_M4U_ID(19, 21) +#define M4U_PORT_L19_VENC_CUR_LUMA MTK_M4U_ID(19, 22) +#define M4U_PORT_L19_VENC_CUR_CHROMA MTK_M4U_ID(19, 23) +#define M4U_PORT_L19_VENC_REF_LUMA MTK_M4U_ID(19, 24) +#define M4U_PORT_L19_VENC_REF_CHROMA MTK_M4U_ID(19, 25) +#define M4U_PORT_L19_VENC_SUB_R_CHROMA MTK_M4U_ID(19, 26) + +/* larb20 */ +#define M4U_PORT_L20_VENC_RCPU MTK_M4U_ID(20, 0) +#define M4U_PORT_L20_VENC_REC MTK_M4U_ID(20, 1) +#define M4U_PORT_L20_VENC_BSDMA MTK_M4U_ID(20, 2) +#define M4U_PORT_L20_VENC_SV_COMV MTK_M4U_ID(20, 3) +#define M4U_PORT_L20_VENC_RD_COMV MTK_M4U_ID(20, 4) +#define M4U_PORT_L20_VENC_NBM_RDMA MTK_M4U_ID(20, 5) +#define M4U_PORT_L20_VENC_NBM_RDMA_LITE MTK_M4U_ID(20, 6) +#define M4U_PORT_L20_JPGENC_Y_RDMA MTK_M4U_ID(20, 7) +#define M4U_PORT_L20_JPGENC_C_RDMA MTK_M4U_ID(20, 8) +#define M4U_PORT_L20_JPGENC_Q_TABLE MTK_M4U_ID(20, 9) +#define M4U_PORT_L20_VENC_SUB_W_LUMA MTK_M4U_ID(20, 10) +#define M4U_PORT_L20_VENC_FCS_NBM_RDMA MTK_M4U_ID(20, 11) +#define M4U_PORT_L20_JPGENC_BSDMA MTK_M4U_ID(20, 12) +#define M4U_PORT_L20_JPGDEC_WDMA0 MTK_M4U_ID(20, 13) +#define M4U_PORT_L20_JPGDEC_BSDMA0 MTK_M4U_ID(20, 14) +#define M4U_PORT_L20_VENC_NBM_WDMA MTK_M4U_ID(20, 15) +#define M4U_PORT_L20_VENC_NBM_WDMA_LITE MTK_M4U_ID(20, 16) +#define M4U_PORT_L20_VENC_FCS_NBM_WDMA MTK_M4U_ID(20, 17) +#define M4U_PORT_L20_JPGDEC_WDMA1 MTK_M4U_ID(20, 18) +#define M4U_PORT_L20_JPGDEC_BSDMA1 MTK_M4U_ID(20, 19) +#define M4U_PORT_L20_JPGDEC_BUFF_OFFSET1 MTK_M4U_ID(20, 20) +#define M4U_PORT_L20_JPGDEC_BUFF_OFFSET0 MTK_M4U_ID(20, 21) +#define M4U_PORT_L20_VENC_CUR_LUMA MTK_M4U_ID(20, 22) +#define M4U_PORT_L20_VENC_CUR_CHROMA MTK_M4U_ID(20, 23) +#define M4U_PORT_L20_VENC_REF_LUMA MTK_M4U_ID(20, 24) +#define M4U_PORT_L20_VENC_REF_CHROMA MTK_M4U_ID(20, 25) +#define M4U_PORT_L20_VENC_SUB_R_CHROMA MTK_M4U_ID(20, 26) + +/* larb21 */ +#define M4U_PORT_L21_VDEC_MC_EXT MTK_M4U_ID(21, 0) +#define M4U_PORT_L21_VDEC_UFO_EXT MTK_M4U_ID(21, 1) +#define M4U_PORT_L21_VDEC_PP_EXT MTK_M4U_ID(21, 2) +#define M4U_PORT_L21_VDEC_PRED_RD_EXT MTK_M4U_ID(21, 3) +#define M4U_PORT_L21_VDEC_PRED_WR_EXT MTK_M4U_ID(21, 4) +#define M4U_PORT_L21_VDEC_PPWRAP_EXT MTK_M4U_ID(21, 5) +#define M4U_PORT_L21_VDEC_TILE_EXT MTK_M4U_ID(21, 6) +#define M4U_PORT_L21_VDEC_VLD_EXT MTK_M4U_ID(21, 7) +#define M4U_PORT_L21_VDEC_VLD2_EXT MTK_M4U_ID(21, 8) +#define M4U_PORT_L21_VDEC_AVC_MV_EXT MTK_M4U_ID(21, 9) + +/* larb22 */ +#define M4U_PORT_L22_VDEC_MC_EXT MTK_M4U_ID(22, 0) +#define M4U_PORT_L22_VDEC_UFO_EXT MTK_M4U_ID(22, 1) +#define M4U_PORT_L22_VDEC_PP_EXT MTK_M4U_ID(22, 2) +#define M4U_PORT_L22_VDEC_PRED_RD_EXT MTK_M4U_ID(22, 3) +#define M4U_PORT_L22_VDEC_PRED_WR_EXT MTK_M4U_ID(22, 4) +#define M4U_PORT_L22_VDEC_PPWRAP_EXT MTK_M4U_ID(22, 5) +#define M4U_PORT_L22_VDEC_TILE_EXT MTK_M4U_ID(22, 6) +#define M4U_PORT_L22_VDEC_VLD_EXT MTK_M4U_ID(22, 7) +#define M4U_PORT_L22_VDEC_VLD2_EXT MTK_M4U_ID(22, 8) +#define M4U_PORT_L22_VDEC_AVC_MV_EXT MTK_M4U_ID(22, 9) + +/* larb23 */ +#define M4U_PORT_L23_VDEC_UFO_ENC_EXT MTK_M4U_ID(23, 0) +#define M4U_PORT_L23_VDEC_RDMA_EXT MTK_M4U_ID(23, 1) + +/* larb24 */ +#define M4U_PORT_L24_VDEC_LAT0_VLD_EXT MTK_M4U_ID(24, 0) +#define M4U_PORT_L24_VDEC_LAT0_VLD2_EXT MTK_M4U_ID(24, 1) +#define M4U_PORT_L24_VDEC_LAT0_AVC_MC_EXT MTK_M4U_ID(24, 2) +#define M4U_PORT_L24_VDEC_LAT0_PRED_RD_EXT MTK_M4U_ID(24, 3) +#define M4U_PORT_L24_VDEC_LAT0_TILE_EXT MTK_M4U_ID(24, 4) +#define M4U_PORT_L24_VDEC_LAT0_WDMA_EXT MTK_M4U_ID(24, 5) +#define M4U_PORT_L24_VDEC_LAT1_VLD_EXT MTK_M4U_ID(24, 6) +#define M4U_PORT_L24_VDEC_LAT1_VLD2_EXT MTK_M4U_ID(24, 7) +#define M4U_PORT_L24_VDEC_LAT1_AVC_MC_EXT MTK_M4U_ID(24, 8) +#define M4U_PORT_L24_VDEC_LAT1_PRED_RD_EXT MTK_M4U_ID(24, 9) +#define M4U_PORT_L24_VDEC_LAT1_TILE_EXT MTK_M4U_ID(24, 10) +#define M4U_PORT_L24_VDEC_LAT1_WDMA_EXT MTK_M4U_ID(24, 11) + +/* larb25 */ +#define M4U_PORT_L25_CAM_MRAW0_LSCI_M1 MTK_M4U_ID(25, 0) +#define M4U_PORT_L25_CAM_MRAW0_CQI_M1 MTK_M4U_ID(25, 1) +#define M4U_PORT_L25_CAM_MRAW0_CQI_M2 MTK_M4U_ID(25, 2) +#define M4U_PORT_L25_CAM_MRAW0_IMGO_M1 MTK_M4U_ID(25, 3) +#define M4U_PORT_L25_CAM_MRAW0_IMGBO_M1 MTK_M4U_ID(25, 4) +#define M4U_PORT_L25_CAM_MRAW2_LSCI_M1 MTK_M4U_ID(25, 5) +#define M4U_PORT_L25_CAM_MRAW2_CQI_M1 MTK_M4U_ID(25, 6) +#define M4U_PORT_L25_CAM_MRAW2_CQI_M2 MTK_M4U_ID(25, 7) +#define M4U_PORT_L25_CAM_MRAW2_IMGO_M1 MTK_M4U_ID(25, 8) +#define M4U_PORT_L25_CAM_MRAW2_IMGBO_M1 MTK_M4U_ID(25, 9) +#define M4U_PORT_L25_CAM_MRAW0_AFO_M1 MTK_M4U_ID(25, 10) +#define M4U_PORT_L25_CAM_MRAW2_AFO_M1 MTK_M4U_ID(25, 11) + +/* larb26 */ +#define M4U_PORT_L26_CAM_MRAW1_LSCI_M1 MTK_M4U_ID(26, 0) +#define M4U_PORT_L26_CAM_MRAW1_CQI_M1 MTK_M4U_ID(26, 1) +#define M4U_PORT_L26_CAM_MRAW1_CQI_M2 MTK_M4U_ID(26, 2) +#define M4U_PORT_L26_CAM_MRAW1_IMGO_M1 MTK_M4U_ID(26, 3) +#define M4U_PORT_L26_CAM_MRAW1_IMGBO_M1 MTK_M4U_ID(26, 4) +#define M4U_PORT_L26_CAM_MRAW3_LSCI_M1 MTK_M4U_ID(26, 5) +#define M4U_PORT_L26_CAM_MRAW3_CQI_M1 MTK_M4U_ID(26, 6) +#define M4U_PORT_L26_CAM_MRAW3_CQI_M2 MTK_M4U_ID(26, 7) +#define M4U_PORT_L26_CAM_MRAW3_IMGO_M1 MTK_M4U_ID(26, 8) +#define M4U_PORT_L26_CAM_MRAW3_IMGBO_M1 MTK_M4U_ID(26, 9) +#define M4U_PORT_L26_CAM_MRAW1_AFO_M1 MTK_M4U_ID(26, 10) +#define M4U_PORT_L26_CAM_MRAW3_AFO_M1 MTK_M4U_ID(26, 11) + +/* larb27 */ +#define M4U_PORT_L27_CAM_IMGO_R1 MTK_M4U_ID(27, 0) +#define M4U_PORT_L27_CAM_CQI_R1 MTK_M4U_ID(27, 1) +#define M4U_PORT_L27_CAM_CQI_R2 MTK_M4U_ID(27, 2) +#define M4U_PORT_L27_CAM_BPCI_R1 MTK_M4U_ID(27, 3) +#define M4U_PORT_L27_CAM_LSCI_R1 MTK_M4U_ID(27, 4) +#define M4U_PORT_L27_CAM_RAWI_R2 MTK_M4U_ID(27, 5) +#define M4U_PORT_L27_CAM_RAWI_R3 MTK_M4U_ID(27, 6) +#define M4U_PORT_L27_CAM_UFDI_R2 MTK_M4U_ID(27, 7) +#define M4U_PORT_L27_CAM_UFDI_R3 MTK_M4U_ID(27, 8) +#define M4U_PORT_L27_CAM_RAWI_R4 MTK_M4U_ID(27, 9) +#define M4U_PORT_L27_CAM_RAWI_R5 MTK_M4U_ID(27, 10) +#define M4U_PORT_L27_CAM_AAI_R1 MTK_M4U_ID(27, 11) +#define M4U_PORT_L27_CAM_FHO_R1 MTK_M4U_ID(27, 12) +#define M4U_PORT_L27_CAM_AAO_R1 MTK_M4U_ID(27, 13) +#define M4U_PORT_L27_CAM_TSFSO_R1 MTK_M4U_ID(27, 14) +#define M4U_PORT_L27_CAM_FLKO_R1 MTK_M4U_ID(27, 15) + +/* larb28 */ +#define M4U_PORT_L28_CAM_YUVO_R1 MTK_M4U_ID(28, 0) +#define M4U_PORT_L28_CAM_YUVO_R3 MTK_M4U_ID(28, 1) +#define M4U_PORT_L28_CAM_YUVCO_R1 MTK_M4U_ID(28, 2) +#define M4U_PORT_L28_CAM_YUVO_R2 MTK_M4U_ID(28, 3) +#define M4U_PORT_L28_CAM_RZH1N2TO_R1 MTK_M4U_ID(28, 4) +#define M4U_PORT_L28_CAM_DRZS4NO_R1 MTK_M4U_ID(28, 5) +#define M4U_PORT_L28_CAM_TNCSO_R1 MTK_M4U_ID(28, 6) + +#endif From dc1d99342db705efbeec517f9c6187f3b93ab423 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:53 +0800 Subject: [PATCH 536/803] dt-bindings: mediatek: mt8195: Add binding for infra IOMMU In mt8195, we have a new IOMMU that is for INFRA IOMMU. its masters mainly are PCIe and USB. Different with MM IOMMU, all these masters connect with IOMMU directly, there is no mediatek,larbs property for infra IOMMU. Another thing is about PCIe ports. currently the function "of_iommu_configure_dev_id" only support the id number is 1, But our PCIe have two ports, one is for reading and the other is for writing. see more about the PCIe patch in this patchset. Thus, I only list the reading id here and add the other id in our driver. Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-3-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 13 ++++++++++++- .../dt-bindings/memory/mt8195-memory-port.h | 18 ++++++++++++++++++ include/dt-bindings/memory/mtk-memory-port.h | 2 ++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 2223408e91a9..eed59ec00e78 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -79,6 +79,7 @@ properties: - mediatek,mt8192-m4u # generation two - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two + - mediatek,mt8195-iommu-infra # generation two - description: mt7623 generation one items: @@ -131,7 +132,6 @@ required: - compatible - reg - interrupts - - mediatek,larbs - '#iommu-cells' allOf: @@ -163,6 +163,17 @@ allOf: required: - power-domains + - if: # The IOMMUs don't have larbs. + not: + properties: + compatible: + contains: + const: mediatek,mt8195-iommu-infra + + then: + required: + - mediatek,larbs + additionalProperties: false examples: diff --git a/include/dt-bindings/memory/mt8195-memory-port.h b/include/dt-bindings/memory/mt8195-memory-port.h index c10e8b61f1e8..70ba9f498eeb 100644 --- a/include/dt-bindings/memory/mt8195-memory-port.h +++ b/include/dt-bindings/memory/mt8195-memory-port.h @@ -387,4 +387,22 @@ #define M4U_PORT_L28_CAM_DRZS4NO_R1 MTK_M4U_ID(28, 5) #define M4U_PORT_L28_CAM_TNCSO_R1 MTK_M4U_ID(28, 6) +/* Infra iommu ports */ +/* PCIe1: read: BIT16; write BIT17. */ +#define IOMMU_PORT_INFRA_PCIE1 MTK_IFAIOMMU_PERI_ID(16) +/* PCIe0: read: BIT18; write BIT19. */ +#define IOMMU_PORT_INFRA_PCIE0 MTK_IFAIOMMU_PERI_ID(18) +#define IOMMU_PORT_INFRA_SSUSB_P3_R MTK_IFAIOMMU_PERI_ID(20) +#define IOMMU_PORT_INFRA_SSUSB_P3_W MTK_IFAIOMMU_PERI_ID(21) +#define IOMMU_PORT_INFRA_SSUSB_P2_R MTK_IFAIOMMU_PERI_ID(22) +#define IOMMU_PORT_INFRA_SSUSB_P2_W MTK_IFAIOMMU_PERI_ID(23) +#define IOMMU_PORT_INFRA_SSUSB_P1_1_R MTK_IFAIOMMU_PERI_ID(24) +#define IOMMU_PORT_INFRA_SSUSB_P1_1_W MTK_IFAIOMMU_PERI_ID(25) +#define IOMMU_PORT_INFRA_SSUSB_P1_0_R MTK_IFAIOMMU_PERI_ID(26) +#define IOMMU_PORT_INFRA_SSUSB_P1_0_W MTK_IFAIOMMU_PERI_ID(27) +#define IOMMU_PORT_INFRA_SSUSB2_R MTK_IFAIOMMU_PERI_ID(28) +#define IOMMU_PORT_INFRA_SSUSB2_W MTK_IFAIOMMU_PERI_ID(29) +#define IOMMU_PORT_INFRA_SSUSB_R MTK_IFAIOMMU_PERI_ID(30) +#define IOMMU_PORT_INFRA_SSUSB_W MTK_IFAIOMMU_PERI_ID(31) + #endif diff --git a/include/dt-bindings/memory/mtk-memory-port.h b/include/dt-bindings/memory/mtk-memory-port.h index 7d64103209af..2f68a0511a25 100644 --- a/include/dt-bindings/memory/mtk-memory-port.h +++ b/include/dt-bindings/memory/mtk-memory-port.h @@ -12,4 +12,6 @@ #define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x1f) #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) +#define MTK_IFAIOMMU_PERI_ID(port) MTK_M4U_ID(0, port) + #endif From 2d555a3844142b9e0f876925a8475cc830cd472a Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:54 +0800 Subject: [PATCH 537/803] dt-bindings: mediatek: mt8186: Add binding for MM iommu Add mt8186 iommu binding. "-mm" means the iommu is for Multimedia. Signed-off-by: Yong Wu Acked-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220503071427.2285-4-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 4 + .../dt-bindings/memory/mt8186-memory-port.h | 217 ++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 include/dt-bindings/memory/mt8186-memory-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index eed59ec00e78..91a3629a8e6e 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -76,6 +76,7 @@ properties: - mediatek,mt8167-m4u # generation two - mediatek,mt8173-m4u # generation two - mediatek,mt8183-m4u # generation two + - mediatek,mt8186-iommu-mm # generation two - mediatek,mt8192-m4u # generation two - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two @@ -122,6 +123,7 @@ properties: dt-binding/memory/mt8167-larb-port.h for mt8167, dt-binding/memory/mt8173-larb-port.h for mt8173, dt-binding/memory/mt8183-larb-port.h for mt8183, + dt-binding/memory/mt8186-memory-port.h for mt8186, dt-binding/memory/mt8192-larb-port.h for mt8192. dt-binding/memory/mt8195-memory-port.h for mt8195. @@ -143,6 +145,7 @@ allOf: - mediatek,mt2701-m4u - mediatek,mt2712-m4u - mediatek,mt8173-m4u + - mediatek,mt8186-iommu-mm - mediatek,mt8192-m4u - mediatek,mt8195-iommu-vdo - mediatek,mt8195-iommu-vpp @@ -155,6 +158,7 @@ allOf: properties: compatible: enum: + - mediatek,mt8186-iommu-mm - mediatek,mt8192-m4u - mediatek,mt8195-iommu-vdo - mediatek,mt8195-iommu-vpp diff --git a/include/dt-bindings/memory/mt8186-memory-port.h b/include/dt-bindings/memory/mt8186-memory-port.h new file mode 100644 index 000000000000..2bc6e4433048 --- /dev/null +++ b/include/dt-bindings/memory/mt8186-memory-port.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 MediaTek Inc. + * + * Author: Anan Sun + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8186_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8186_LARB_PORT_H_ + +#include + +/* + * MM IOMMU supports 16GB dma address. We separate it to four ranges: + * 0 ~ 4G; 4G ~ 8G; 8G ~ 12G; 12G ~ 16G, we could adjust these masters + * locate in anyone region. BUT: + * a) Make sure all the ports inside a larb are in one range. + * b) The iova of any master can NOT cross the 4G/8G/12G boundary. + * + * This is the suggested mapping in this SoC: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/1/2 + * vcodec 4G ~ 8G larb4/7 + * cam/mdp 8G ~ 12G the other larbs. + * N/A 12G ~ 16G + * CCU0 0x24000_0000 ~ 0x243ff_ffff larb13: port 9/10 + * CCU1 0x24400_0000 ~ 0x247ff_ffff larb14: port 4/5 + */ + +/* MM IOMMU ports */ +/* LARB 0 -- MMSYS */ +#define IOMMU_PORT_L0_DISP_POSTMASK0 MTK_M4U_ID(0, 0) +#define IOMMU_PORT_L0_REVERSED MTK_M4U_ID(0, 1) +#define IOMMU_PORT_L0_OVL_RDMA0 MTK_M4U_ID(0, 2) +#define IOMMU_PORT_L0_DISP_FAKE0 MTK_M4U_ID(0, 3) + +/* LARB 1 -- MMSYS */ +#define IOMMU_PORT_L1_DISP_RDMA1 MTK_M4U_ID(1, 0) +#define IOMMU_PORT_L1_OVL_2L_RDMA0 MTK_M4U_ID(1, 1) +#define IOMMU_PORT_L1_DISP_RDMA0 MTK_M4U_ID(1, 2) +#define IOMMU_PORT_L1_DISP_WDMA0 MTK_M4U_ID(1, 3) +#define IOMMU_PORT_L1_DISP_FAKE1 MTK_M4U_ID(1, 4) + +/* LARB 2 -- MMSYS */ +#define IOMMU_PORT_L2_MDP_RDMA0 MTK_M4U_ID(2, 0) +#define IOMMU_PORT_L2_MDP_RDMA1 MTK_M4U_ID(2, 1) +#define IOMMU_PORT_L2_MDP_WROT0 MTK_M4U_ID(2, 2) +#define IOMMU_PORT_L2_MDP_WROT1 MTK_M4U_ID(2, 3) +#define IOMMU_PORT_L2_DISP_FAKE0 MTK_M4U_ID(2, 4) + +/* LARB 4 -- VDEC */ +#define IOMMU_PORT_L4_HW_VDEC_MC_EXT MTK_M4U_ID(4, 0) +#define IOMMU_PORT_L4_HW_VDEC_UFO_EXT MTK_M4U_ID(4, 1) +#define IOMMU_PORT_L4_HW_VDEC_PP_EXT MTK_M4U_ID(4, 2) +#define IOMMU_PORT_L4_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(4, 3) +#define IOMMU_PORT_L4_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(4, 4) +#define IOMMU_PORT_L4_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(4, 5) +#define IOMMU_PORT_L4_HW_VDEC_TILE_EXT MTK_M4U_ID(4, 6) +#define IOMMU_PORT_L4_HW_VDEC_VLD_EXT MTK_M4U_ID(4, 7) +#define IOMMU_PORT_L4_HW_VDEC_VLD2_EXT MTK_M4U_ID(4, 8) +#define IOMMU_PORT_L4_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(4, 9) +#define IOMMU_PORT_L4_HW_VDEC_UFO_ENC_EXT MTK_M4U_ID(4, 10) +#define IOMMU_PORT_L4_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(4, 11) +#define IOMMU_PORT_L4_HW_MINI_MDP_R0_EXT MTK_M4U_ID(4, 12) +#define IOMMU_PORT_L4_HW_MINI_MDP_W0_EXT MTK_M4U_ID(4, 13) + +/* LARB 7 -- VENC */ +#define IOMMU_PORT_L7_VENC_RCPU MTK_M4U_ID(7, 0) +#define IOMMU_PORT_L7_VENC_REC MTK_M4U_ID(7, 1) +#define IOMMU_PORT_L7_VENC_BSDMA MTK_M4U_ID(7, 2) +#define IOMMU_PORT_L7_VENC_SV_COMV MTK_M4U_ID(7, 3) +#define IOMMU_PORT_L7_VENC_RD_COMV MTK_M4U_ID(7, 4) +#define IOMMU_PORT_L7_VENC_CUR_LUMA MTK_M4U_ID(7, 5) +#define IOMMU_PORT_L7_VENC_CUR_CHROMA MTK_M4U_ID(7, 6) +#define IOMMU_PORT_L7_VENC_REF_LUMA MTK_M4U_ID(7, 7) +#define IOMMU_PORT_L7_VENC_REF_CHROMA MTK_M4U_ID(7, 8) +#define IOMMU_PORT_L7_JPGENC_Y_RDMA MTK_M4U_ID(7, 9) +#define IOMMU_PORT_L7_JPGENC_C_RDMA MTK_M4U_ID(7, 10) +#define IOMMU_PORT_L7_JPGENC_Q_TABLE MTK_M4U_ID(7, 11) +#define IOMMU_PORT_L7_JPGENC_BSDMA MTK_M4U_ID(7, 12) + +/* LARB 8 -- WPE */ +#define IOMMU_PORT_L8_WPE_RDMA_0 MTK_M4U_ID(8, 0) +#define IOMMU_PORT_L8_WPE_RDMA_1 MTK_M4U_ID(8, 1) +#define IOMMU_PORT_L8_WPE_WDMA_0 MTK_M4U_ID(8, 2) + +/* LARB 9 -- IMG-1 */ +#define IOMMU_PORT_L9_IMG_IMGI_D1 MTK_M4U_ID(9, 0) +#define IOMMU_PORT_L9_IMG_IMGBI_D1 MTK_M4U_ID(9, 1) +#define IOMMU_PORT_L9_IMG_DMGI_D1 MTK_M4U_ID(9, 2) +#define IOMMU_PORT_L9_IMG_DEPI_D1 MTK_M4U_ID(9, 3) +#define IOMMU_PORT_L9_IMG_LCE_D1 MTK_M4U_ID(9, 4) +#define IOMMU_PORT_L9_IMG_SMTI_D1 MTK_M4U_ID(9, 5) +#define IOMMU_PORT_L9_IMG_SMTO_D2 MTK_M4U_ID(9, 6) +#define IOMMU_PORT_L9_IMG_SMTO_D1 MTK_M4U_ID(9, 7) +#define IOMMU_PORT_L9_IMG_CRZO_D1 MTK_M4U_ID(9, 8) +#define IOMMU_PORT_L9_IMG_IMG3O_D1 MTK_M4U_ID(9, 9) +#define IOMMU_PORT_L9_IMG_VIPI_D1 MTK_M4U_ID(9, 10) +#define IOMMU_PORT_L9_IMG_SMTI_D5 MTK_M4U_ID(9, 11) +#define IOMMU_PORT_L9_IMG_TIMGO_D1 MTK_M4U_ID(9, 12) +#define IOMMU_PORT_L9_IMG_UFBC_W0 MTK_M4U_ID(9, 13) +#define IOMMU_PORT_L9_IMG_UFBC_R0 MTK_M4U_ID(9, 14) +#define IOMMU_PORT_L9_IMG_WPE_RDMA1 MTK_M4U_ID(9, 15) +#define IOMMU_PORT_L9_IMG_WPE_RDMA0 MTK_M4U_ID(9, 16) +#define IOMMU_PORT_L9_IMG_WPE_WDMA MTK_M4U_ID(9, 17) +#define IOMMU_PORT_L9_IMG_MFB_RDMA0 MTK_M4U_ID(9, 18) +#define IOMMU_PORT_L9_IMG_MFB_RDMA1 MTK_M4U_ID(9, 19) +#define IOMMU_PORT_L9_IMG_MFB_RDMA2 MTK_M4U_ID(9, 20) +#define IOMMU_PORT_L9_IMG_MFB_RDMA3 MTK_M4U_ID(9, 21) +#define IOMMU_PORT_L9_IMG_MFB_RDMA4 MTK_M4U_ID(9, 22) +#define IOMMU_PORT_L9_IMG_MFB_RDMA5 MTK_M4U_ID(9, 23) +#define IOMMU_PORT_L9_IMG_MFB_WDMA0 MTK_M4U_ID(9, 24) +#define IOMMU_PORT_L9_IMG_MFB_WDMA1 MTK_M4U_ID(9, 25) +#define IOMMU_PORT_L9_IMG_RESERVE6 MTK_M4U_ID(9, 26) +#define IOMMU_PORT_L9_IMG_RESERVE7 MTK_M4U_ID(9, 27) +#define IOMMU_PORT_L9_IMG_RESERVE8 MTK_M4U_ID(9, 28) + +/* LARB 11 -- IMG-2 */ +#define IOMMU_PORT_L11_IMG_IMGI_D1 MTK_M4U_ID(11, 0) +#define IOMMU_PORT_L11_IMG_IMGBI_D1 MTK_M4U_ID(11, 1) +#define IOMMU_PORT_L11_IMG_DMGI_D1 MTK_M4U_ID(11, 2) +#define IOMMU_PORT_L11_IMG_DEPI_D1 MTK_M4U_ID(11, 3) +#define IOMMU_PORT_L11_IMG_LCE_D1 MTK_M4U_ID(11, 4) +#define IOMMU_PORT_L11_IMG_SMTI_D1 MTK_M4U_ID(11, 5) +#define IOMMU_PORT_L11_IMG_SMTO_D2 MTK_M4U_ID(11, 6) +#define IOMMU_PORT_L11_IMG_SMTO_D1 MTK_M4U_ID(11, 7) +#define IOMMU_PORT_L11_IMG_CRZO_D1 MTK_M4U_ID(11, 8) +#define IOMMU_PORT_L11_IMG_IMG3O_D1 MTK_M4U_ID(11, 9) +#define IOMMU_PORT_L11_IMG_VIPI_D1 MTK_M4U_ID(11, 10) +#define IOMMU_PORT_L11_IMG_SMTI_D5 MTK_M4U_ID(11, 11) +#define IOMMU_PORT_L11_IMG_TIMGO_D1 MTK_M4U_ID(11, 12) +#define IOMMU_PORT_L11_IMG_UFBC_W0 MTK_M4U_ID(11, 13) +#define IOMMU_PORT_L11_IMG_UFBC_R0 MTK_M4U_ID(11, 14) +#define IOMMU_PORT_L11_IMG_WPE_RDMA1 MTK_M4U_ID(11, 15) +#define IOMMU_PORT_L11_IMG_WPE_RDMA0 MTK_M4U_ID(11, 16) +#define IOMMU_PORT_L11_IMG_WPE_WDMA MTK_M4U_ID(11, 17) +#define IOMMU_PORT_L11_IMG_MFB_RDMA0 MTK_M4U_ID(11, 18) +#define IOMMU_PORT_L11_IMG_MFB_RDMA1 MTK_M4U_ID(11, 19) +#define IOMMU_PORT_L11_IMG_MFB_RDMA2 MTK_M4U_ID(11, 20) +#define IOMMU_PORT_L11_IMG_MFB_RDMA3 MTK_M4U_ID(11, 21) +#define IOMMU_PORT_L11_IMG_MFB_RDMA4 MTK_M4U_ID(11, 22) +#define IOMMU_PORT_L11_IMG_MFB_RDMA5 MTK_M4U_ID(11, 23) +#define IOMMU_PORT_L11_IMG_MFB_WDMA0 MTK_M4U_ID(11, 24) +#define IOMMU_PORT_L11_IMG_MFB_WDMA1 MTK_M4U_ID(11, 25) +#define IOMMU_PORT_L11_IMG_RESERVE6 MTK_M4U_ID(11, 26) +#define IOMMU_PORT_L11_IMG_RESERVE7 MTK_M4U_ID(11, 27) +#define IOMMU_PORT_L11_IMG_RESERVE8 MTK_M4U_ID(11, 28) + +/* LARB 13 -- CAM */ +#define IOMMU_PORT_L13_CAM_MRAWI MTK_M4U_ID(13, 0) +#define IOMMU_PORT_L13_CAM_MRAWO_0 MTK_M4U_ID(13, 1) +#define IOMMU_PORT_L13_CAM_MRAWO_1 MTK_M4U_ID(13, 2) +#define IOMMU_PORT_L13_CAM_CAMSV_4 MTK_M4U_ID(13, 6) +#define IOMMU_PORT_L13_CAM_CAMSV_5 MTK_M4U_ID(13, 7) +#define IOMMU_PORT_L13_CAM_CAMSV_6 MTK_M4U_ID(13, 8) +#define IOMMU_PORT_L13_CAM_CCUI MTK_M4U_ID(13, 9) +#define IOMMU_PORT_L13_CAM_CCUO MTK_M4U_ID(13, 10) +#define IOMMU_PORT_L13_CAM_FAKE MTK_M4U_ID(13, 11) + +/* LARB 14 -- CAM */ +#define IOMMU_PORT_L14_CAM_CCUI MTK_M4U_ID(14, 4) +#define IOMMU_PORT_L14_CAM_CCUO MTK_M4U_ID(14, 5) + +/* LARB 16 -- RAW-A */ +#define IOMMU_PORT_L16_CAM_IMGO_R1_A MTK_M4U_ID(16, 0) +#define IOMMU_PORT_L16_CAM_RRZO_R1_A MTK_M4U_ID(16, 1) +#define IOMMU_PORT_L16_CAM_CQI_R1_A MTK_M4U_ID(16, 2) +#define IOMMU_PORT_L16_CAM_BPCI_R1_A MTK_M4U_ID(16, 3) +#define IOMMU_PORT_L16_CAM_YUVO_R1_A MTK_M4U_ID(16, 4) +#define IOMMU_PORT_L16_CAM_UFDI_R2_A MTK_M4U_ID(16, 5) +#define IOMMU_PORT_L16_CAM_RAWI_R2_A MTK_M4U_ID(16, 6) +#define IOMMU_PORT_L16_CAM_RAWI_R3_A MTK_M4U_ID(16, 7) +#define IOMMU_PORT_L16_CAM_AAO_R1_A MTK_M4U_ID(16, 8) +#define IOMMU_PORT_L16_CAM_AFO_R1_A MTK_M4U_ID(16, 9) +#define IOMMU_PORT_L16_CAM_FLKO_R1_A MTK_M4U_ID(16, 10) +#define IOMMU_PORT_L16_CAM_LCESO_R1_A MTK_M4U_ID(16, 11) +#define IOMMU_PORT_L16_CAM_CRZO_R1_A MTK_M4U_ID(16, 12) +#define IOMMU_PORT_L16_CAM_LTMSO_R1_A MTK_M4U_ID(16, 13) +#define IOMMU_PORT_L16_CAM_RSSO_R1_A MTK_M4U_ID(16, 14) +#define IOMMU_PORT_L16_CAM_AAHO_R1_A MTK_M4U_ID(16, 15) +#define IOMMU_PORT_L16_CAM_LSCI_R1_A MTK_M4U_ID(16, 16) + +/* LARB 17 -- RAW-B */ +#define IOMMU_PORT_L17_CAM_IMGO_R1_B MTK_M4U_ID(17, 0) +#define IOMMU_PORT_L17_CAM_RRZO_R1_B MTK_M4U_ID(17, 1) +#define IOMMU_PORT_L17_CAM_CQI_R1_B MTK_M4U_ID(17, 2) +#define IOMMU_PORT_L17_CAM_BPCI_R1_B MTK_M4U_ID(17, 3) +#define IOMMU_PORT_L17_CAM_YUVO_R1_B MTK_M4U_ID(17, 4) +#define IOMMU_PORT_L17_CAM_UFDI_R2_B MTK_M4U_ID(17, 5) +#define IOMMU_PORT_L17_CAM_RAWI_R2_B MTK_M4U_ID(17, 6) +#define IOMMU_PORT_L17_CAM_RAWI_R3_B MTK_M4U_ID(17, 7) +#define IOMMU_PORT_L17_CAM_AAO_R1_B MTK_M4U_ID(17, 8) +#define IOMMU_PORT_L17_CAM_AFO_R1_B MTK_M4U_ID(17, 9) +#define IOMMU_PORT_L17_CAM_FLKO_R1_B MTK_M4U_ID(17, 10) +#define IOMMU_PORT_L17_CAM_LCESO_R1_B MTK_M4U_ID(17, 11) +#define IOMMU_PORT_L17_CAM_CRZO_R1_B MTK_M4U_ID(17, 12) +#define IOMMU_PORT_L17_CAM_LTMSO_R1_B MTK_M4U_ID(17, 13) +#define IOMMU_PORT_L17_CAM_RSSO_R1_B MTK_M4U_ID(17, 14) +#define IOMMU_PORT_L17_CAM_AAHO_R1_B MTK_M4U_ID(17, 15) +#define IOMMU_PORT_L17_CAM_LSCI_R1_B MTK_M4U_ID(17, 16) + +/* LARB 19 -- IPE */ +#define IOMMU_PORT_L19_IPE_DVS_RDMA MTK_M4U_ID(19, 0) +#define IOMMU_PORT_L19_IPE_DVS_WDMA MTK_M4U_ID(19, 1) +#define IOMMU_PORT_L19_IPE_DVP_RDMA MTK_M4U_ID(19, 2) +#define IOMMU_PORT_L19_IPE_DVP_WDMA MTK_M4U_ID(19, 3) + +/* LARB 20 -- IPE */ +#define IOMMU_PORT_L20_IPE_FDVT_RDA MTK_M4U_ID(20, 0) +#define IOMMU_PORT_L20_IPE_FDVT_RDB MTK_M4U_ID(20, 1) +#define IOMMU_PORT_L20_IPE_FDVT_WRA MTK_M4U_ID(20, 2) +#define IOMMU_PORT_L20_IPE_FDVT_WRB MTK_M4U_ID(20, 3) +#define IOMMU_PORT_L20_IPE_RSC_RDMA0 MTK_M4U_ID(20, 4) +#define IOMMU_PORT_L20_IPE_RSC_WDMA MTK_M4U_ID(20, 5) + +#endif From 645b87c190c959e9bb4f216b8c4add4ee880451a Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:55 +0800 Subject: [PATCH 538/803] iommu/mediatek: Fix 2 HW sharing pgtable issue In the commit 4f956c97d26b ("iommu/mediatek: Move domain_finalise into attach_device"), I overlooked the sharing pgtable case. After that commit, the "data" in the mtk_iommu_domain_finalise always is the data of the current IOMMU HW. Fix this for the sharing pgtable case. Only affect mt2712 which is the only SoC that share pgtable currently. Fixes: 4f956c97d26b ("iommu/mediatek: Move domain_finalise into attach_device") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-5-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6fd75a60abd6..95c82b8bcc35 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -446,7 +446,7 @@ static void mtk_iommu_domain_free(struct iommu_domain *domain) static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + struct mtk_iommu_data *data = dev_iommu_priv_get(dev), *frstdata; struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct device *m4udev = data->dev; int ret, domid; @@ -456,7 +456,10 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, return domid; if (!dom->data) { - if (mtk_iommu_domain_finalise(dom, data, domid)) + /* Data is in the frstdata in sharing pgtable case. */ + frstdata = mtk_iommu_get_m4u_data(); + + if (mtk_iommu_domain_finalise(dom, frstdata, domid)) return -ENODEV; dom->data = data; } From ee55f75e4bcade81d253163641b63bef3e76cac4 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:56 +0800 Subject: [PATCH 539/803] iommu/mediatek: Add list_del in mtk_iommu_remove Lack the list_del in the mtk_iommu_remove, and remove bus_set_iommu(*, NULL) since there may be several iommu HWs. we can not bus_set_iommu null when one iommu driver unbind. This could be a fix for mt2712 which support 2 M4U HW and list them. Fixes: 7c3a2ec02806 ("iommu/mediatek: Merge 2 M4U HWs into one iommu domain") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-6-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 95c82b8bcc35..e4b4ebbcb73f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -955,8 +955,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); - if (iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, NULL); + list_del(&data->list); clk_disable_unprepare(data->bclk); device_link_remove(data->smicomm_dev, &pdev->dev); From 98df772bdd1c4ce717a26289efea15cbbe4b64ed Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:57 +0800 Subject: [PATCH 540/803] iommu/mediatek: Remove clk_disable in mtk_iommu_remove After the commit b34ea31fe013 ("iommu/mediatek: Always enable the clk on resume"), the iommu clock is controlled by the runtime callback. thus remove the clk control in the mtk_iommu_remove. Otherwise, it will warning like: echo 14018000.iommu > /sys/bus/platform/drivers/mtk-iommu/unbind [ 51.413044] ------------[ cut here ]------------ [ 51.413648] vpp0_smi_iommu already disabled [ 51.414233] WARNING: CPU: 2 PID: 157 at */v5.15-rc1/kernel/mediatek/ drivers/clk/clk.c:952 clk_core_disable+0xb0/0xb8 [ 51.417174] Hardware name: MT8195V/C(ENG) (DT) [ 51.418635] pc : clk_core_disable+0xb0/0xb8 [ 51.419177] lr : clk_core_disable+0xb0/0xb8 ... [ 51.429375] Call trace: [ 51.429694] clk_core_disable+0xb0/0xb8 [ 51.430193] clk_core_disable_lock+0x24/0x40 [ 51.430745] clk_disable+0x20/0x30 [ 51.431189] mtk_iommu_remove+0x58/0x118 [ 51.431705] platform_remove+0x28/0x60 [ 51.432197] device_release_driver_internal+0x110/0x1f0 [ 51.432873] device_driver_detach+0x18/0x28 [ 51.433418] unbind_store+0xd4/0x108 [ 51.433886] drv_attr_store+0x24/0x38 [ 51.434363] sysfs_kf_write+0x40/0x58 [ 51.434843] kernfs_fop_write_iter+0x164/0x1e0 Fixes: b34ea31fe013 ("iommu/mediatek: Always enable the clk on resume") Reported-by: Hsin-Yi Wang Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-7-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e4b4ebbcb73f..81b8db450eac 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -957,7 +957,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) list_del(&data->list); - clk_disable_unprepare(data->bclk); device_link_remove(data->smicomm_dev, &pdev->dev); pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); From 0e5a3f2e630b28e88e018655548212ef8eb4dfcb Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:58 +0800 Subject: [PATCH 541/803] iommu/mediatek: Add mutex for m4u_group and m4u_dom in data Add a mutex to protect the data in the structure mtk_iommu_data, like ->"m4u_group" ->"m4u_dom". For the internal data, we should protect it in ourselves driver. Add a mutex for this. This could be a fix for the multi-groups support. Fixes: c3045f39244e ("iommu/mediatek: Support for multi domains") Signed-off-by: Yunfei Wang Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-8-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 13 +++++++++++-- drivers/iommu/mtk_iommu.h | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 81b8db450eac..3413cc98e57e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -464,15 +464,16 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, dom->data = data; } + mutex_lock(&data->mutex); if (!data->m4u_dom) { /* Initialize the M4U HW */ ret = pm_runtime_resume_and_get(m4udev); if (ret < 0) - return ret; + goto err_unlock; ret = mtk_iommu_hw_init(data); if (ret) { pm_runtime_put(m4udev); - return ret; + goto err_unlock; } data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, @@ -480,9 +481,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, pm_runtime_put(m4udev); } + mutex_unlock(&data->mutex); mtk_iommu_config(data, dev, true, domid); return 0; + +err_unlock: + mutex_unlock(&data->mutex); + return ret; } static void mtk_iommu_detach_device(struct iommu_domain *domain, @@ -622,6 +628,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) if (domid < 0) return ERR_PTR(domid); + mutex_lock(&data->mutex); group = data->m4u_group[domid]; if (!group) { group = iommu_group_alloc(); @@ -630,6 +637,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) } else { iommu_group_ref_get(group); } + mutex_unlock(&data->mutex); return group; } @@ -910,6 +918,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, data); + mutex_init(&data->mutex); ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, "mtk-iommu.%pa", &ioaddr); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index b742432220c5..5e8da947affc 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -80,6 +80,8 @@ struct mtk_iommu_data { struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ + struct mutex mutex; /* Protect m4u_group/m4u_dom above */ + struct list_head list; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; }; From ddf67a87bebf2a8750d86f67cb298313ee93db31 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:59 +0800 Subject: [PATCH 542/803] iommu/mediatek: Add mutex for data in the mtk_iommu_domain Same with the previous patch, add a mutex for the "data" in the mtk_iommu_domain. Just improve the safety for multi devices enter attach_device at the same time. We don't get the real issue for this. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-9-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 3413cc98e57e..ecdce5d3e8cf 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -128,6 +128,8 @@ struct mtk_iommu_domain { struct mtk_iommu_data *data; struct iommu_domain domain; + + struct mutex mutex; /* Protect "data" in this structure */ }; static const struct iommu_ops mtk_iommu_ops; @@ -434,6 +436,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; + mutex_init(&dom->mutex); return &dom->domain; } @@ -455,14 +458,19 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, if (domid < 0) return domid; + mutex_lock(&dom->mutex); if (!dom->data) { /* Data is in the frstdata in sharing pgtable case. */ frstdata = mtk_iommu_get_m4u_data(); - if (mtk_iommu_domain_finalise(dom, frstdata, domid)) + ret = mtk_iommu_domain_finalise(dom, frstdata, domid); + if (ret) { + mutex_unlock(&dom->mutex); return -ENODEV; + } dom->data = data; } + mutex_unlock(&dom->mutex); mutex_lock(&data->mutex); if (!data->m4u_dom) { /* Initialize the M4U HW */ From 9e3a2a64365318a743e3c0b028952d2cdbaf2b0c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:00 +0800 Subject: [PATCH 543/803] iommu/mediatek: Adapt sharing and non-sharing pgtable case In previous mt2712, Both IOMMUs are MM IOMMU, and they will share pgtable. However in the latest SoC, another is infra IOMMU, there is no reason to share pgtable between MM with INFRA IOMMU. This patch manage to implement the two case(sharing and non-sharing pgtable). Currently we use for_each_m4u to loop the 2 HWs. Add the list_head into this macro. In the sharing pgtable case, the list_head is the global "m4ulist". In the non-sharing pgtable case, the list_head is hw_list_head which is a variable in the "data". then for_each_m4u will only loop itself. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-10-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 43 +++++++++++++++++++++------------------ drivers/iommu/mtk_iommu.h | 7 +++++++ 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ecdce5d3e8cf..1b8e4405dd0f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -118,6 +118,7 @@ #define WR_THROT_EN BIT(6) #define HAS_LEGACY_IVRP_PADDR BIT(7) #define IOVA_34_EN BIT(8) +#define SHARE_PGTABLE BIT(9) /* 2 HW share pgtable */ #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -167,7 +168,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); static LIST_HEAD(m4ulist); /* List all the M4U HWs */ -#define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) +#define for_each_m4u(data, head) list_for_each_entry(data, head, list) struct mtk_iommu_iova_region { dma_addr_t iova_base; @@ -188,21 +189,10 @@ static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { #endif }; -/* - * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain - * for the performance. - * - * Here always return the mtk_iommu_data of the first probed M4U where the - * iommu domain information is recorded. - */ -static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void) +/* If 2 M4U share a domain(use the same hwlist), Put the corresponding info in first data.*/ +static struct mtk_iommu_data *mtk_iommu_get_frst_data(struct list_head *hwlist) { - struct mtk_iommu_data *data; - - for_each_m4u(data) - return data; - - return NULL; + return list_first_entry(hwlist, struct mtk_iommu_data, list); } static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) @@ -226,11 +216,12 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, size_t granule, struct mtk_iommu_data *data) { + struct list_head *head = data->hw_list; unsigned long flags; int ret; u32 tmp; - for_each_m4u(data) { + for_each_m4u(data, head) { if (pm_runtime_get_if_in_use(data->dev) <= 0) continue; @@ -451,6 +442,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev), *frstdata; struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct list_head *hw_list = data->hw_list; struct device *m4udev = data->dev; int ret, domid; @@ -461,7 +453,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, mutex_lock(&dom->mutex); if (!dom->data) { /* Data is in the frstdata in sharing pgtable case. */ - frstdata = mtk_iommu_get_m4u_data(); + frstdata = mtk_iommu_get_frst_data(hw_list); ret = mtk_iommu_domain_finalise(dom, frstdata, domid); if (ret) { @@ -625,10 +617,12 @@ static void mtk_iommu_release_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct mtk_iommu_data *c_data = dev_iommu_priv_get(dev), *data; + struct list_head *hw_list = c_data->hw_list; struct iommu_group *group; int domid; + data = mtk_iommu_get_frst_data(hw_list); if (!data) return ERR_PTR(-ENODEV); @@ -938,7 +932,15 @@ static int mtk_iommu_probe(struct platform_device *pdev) goto out_sysfs_remove; spin_lock_init(&data->tlb_lock); - list_add_tail(&data->list, &m4ulist); + + if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) { + list_add_tail(&data->list, data->plat_data->hw_list); + data->hw_list = data->plat_data->hw_list; + } else { + INIT_LIST_HEAD(&data->hw_list_head); + list_add_tail(&data->list, &data->hw_list_head); + data->hw_list = &data->hw_list_head; + } if (!iommu_present(&platform_bus_type)) { ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); @@ -1047,7 +1049,8 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = { static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, - .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, + .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG | SHARE_PGTABLE, + .hw_list = &m4ulist, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 5e8da947affc..dc868fce0d2a 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -55,6 +55,7 @@ struct mtk_iommu_plat_data { u32 flags; u32 inv_sel_reg; + struct list_head *hw_list; unsigned int iova_region_nr; const struct mtk_iommu_iova_region *iova_region; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; @@ -82,6 +83,12 @@ struct mtk_iommu_data { struct mutex mutex; /* Protect m4u_group/m4u_dom above */ + /* + * In the sharing pgtable case, list data->list to the global list like m4ulist. + * In the non-sharing pgtable case, list data->list to the itself hw_list_head. + */ + struct list_head *hw_list; + struct list_head hw_list_head; struct list_head list; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; }; From 129a3b885879c42494044ded94d2dd77e90004b0 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:01 +0800 Subject: [PATCH 544/803] iommu/mediatek: Add 12G~16G support for multi domains In mt8192, we preassign 0-4G; 4G-8G; 8G-12G for different multimedia engines. This depends on the "dma-ranges=" in the iommu consumer's dtsi node. Adds 12G-16G region here. and reword the previous comment. we don't limit which master locate in which region. CCU still is 8G-12G. Don't change it here. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-11-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 1b8e4405dd0f..d91a0c138536 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -180,10 +180,12 @@ static const struct mtk_iommu_iova_region single_domain[] = { }; static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { - { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */ + { .iova_base = 0x0, .size = SZ_4G}, /* 0 ~ 4G */ #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) - { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */ - { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ + { .iova_base = SZ_4G, .size = SZ_4G}, /* 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* 8G ~ 12G */ + { .iova_base = SZ_4G * 3, .size = SZ_4G}, /* 12G ~ 16G */ + { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ #endif From 9a87005ed9c2a2135e15e03d97de849577d29437 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:02 +0800 Subject: [PATCH 545/803] iommu/mediatek: Add a flag DCM_DISABLE In the infra iommu, we should disable DCM. add a new flag for this. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-12-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d91a0c138536..92f172a772d1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -51,6 +51,8 @@ #define F_MMU_STANDARD_AXI_MODE_MASK (BIT(3) | BIT(19)) #define REG_MMU_DCM_DIS 0x050 +#define F_MMU_DCM BIT(8) + #define REG_MMU_WR_LEN_CTRL 0x054 #define F_MMU_WR_THROT_DIS_MASK (BIT(5) | BIT(21)) @@ -119,6 +121,7 @@ #define HAS_LEGACY_IVRP_PADDR BIT(7) #define IOVA_34_EN BIT(8) #define SHARE_PGTABLE BIT(9) /* 2 HW share pgtable */ +#define DCM_DISABLE BIT(10) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -765,7 +768,11 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) regval = F_MMU_VLD_PA_RNG(7, 4); writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); } - writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + if (MTK_IOMMU_HAS_FLAG(data->plat_data, DCM_DISABLE)) + writel_relaxed(F_MMU_DCM, data->base + REG_MMU_DCM_DIS); + else + writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) { /* write command throttling mode */ regval = readl_relaxed(data->base + REG_MMU_WR_LEN_CTRL); From d265a4addc9c118d2467177b77a78a9956ddb2d5 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:03 +0800 Subject: [PATCH 546/803] iommu/mediatek: Add a flag STD_AXI_MODE Add a new flag STD_AXI_MODE which is prepared for infra and apu iommu which use the standard axi mode. All the current SoC don't use this flag. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-13-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 92f172a772d1..22e2b104e3ee 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -122,6 +122,7 @@ #define IOVA_34_EN BIT(8) #define SHARE_PGTABLE BIT(9) /* 2 HW share pgtable */ #define DCM_DISABLE BIT(10) +#define STD_AXI_MODE BIT(11) /* For non MM iommu */ #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -785,7 +786,8 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) regval = 0; } else { regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); - regval &= ~F_MMU_STANDARD_AXI_MODE_MASK; + if (!MTK_IOMMU_HAS_FLAG(data->plat_data, STD_AXI_MODE)) + regval &= ~F_MMU_STANDARD_AXI_MODE_MASK; if (MTK_IOMMU_HAS_FLAG(data->plat_data, OUT_ORDER_WR_EN)) regval &= ~F_MMU_IN_ORDER_WR_EN_MASK; } From e6d25e7daaf28341b3be4b8d4ea0bdee6265b1c9 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:04 +0800 Subject: [PATCH 547/803] iommu/mediatek: Remove the granule in the tlb flush The MediaTek IOMMU doesn't care about granule when tlb flushing. Remove this variable. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-14-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 22e2b104e3ee..ca77e7f1ce5d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -219,7 +219,6 @@ static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, - size_t granule, struct mtk_iommu_data *data) { struct list_head *head = data->hw_list; @@ -541,8 +540,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct mtk_iommu_domain *dom = to_mtk_domain(domain); size_t length = gather->end - gather->start + 1; - mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, - dom->data); + mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->data); } static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, @@ -550,7 +548,7 @@ static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - mtk_iommu_tlb_flush_range_sync(iova, size, size, dom->data); + mtk_iommu_tlb_flush_range_sync(iova, size, dom->data); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, From 82e51771bff9ec900891a51629278a54a866a6ba Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:05 +0800 Subject: [PATCH 548/803] iommu/mediatek: Always enable output PA over 32bits in isr Currently the output PA[32:33] is contained by the flag IOVA_34. This is not right. the iova_34 has no relation with pa[32:33], the 32bits iova still could map to pa[32:33]. Move it out from the flag. No need fix tag since currently only mt8192 use the calulation and it always has this IOVA_34 flag. Prepare for the IOMMU that still use IOVA 32bits but its dram size may be over 4GB. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-15-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ca77e7f1ce5d..d9689e041336 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -283,11 +283,11 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); - pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; fault_iova |= (u64)va34_32 << 32; - fault_pa |= (u64)pa34_32 << 32; } + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); + fault_pa |= (u64)pa34_32 << 32; fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { From 9ec30c09547d8dcf5491bb1334a5099a3412538b Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:06 +0800 Subject: [PATCH 549/803] iommu/mediatek: Add SUB_COMMON_3BITS flag In prevous SoC, the sub common id occupy 2 bits. the mt8195's sub common id has 3bits. Add a new flag for this. and rename the previous flag to _2BITS. For readable, I put these two flags together, then move the other flags. no functional change. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-16-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 26 ++++++++++++++++---------- drivers/iommu/mtk_iommu.h | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d9689e041336..937478cd8966 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -105,6 +105,8 @@ #define REG_MMU1_INT_ID 0x154 #define F_MMU_INT_ID_COMM_ID(a) (((a) >> 9) & 0x7) #define F_MMU_INT_ID_SUB_COMM_ID(a) (((a) >> 7) & 0x3) +#define F_MMU_INT_ID_COMM_ID_EXT(a) (((a) >> 10) & 0x7) +#define F_MMU_INT_ID_SUB_COMM_ID_EXT(a) (((a) >> 7) & 0x7) #define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) @@ -116,13 +118,14 @@ #define HAS_VLD_PA_RNG BIT(2) #define RESET_AXI BIT(3) #define OUT_ORDER_WR_EN BIT(4) -#define HAS_SUB_COMM BIT(5) -#define WR_THROT_EN BIT(6) -#define HAS_LEGACY_IVRP_PADDR BIT(7) -#define IOVA_34_EN BIT(8) -#define SHARE_PGTABLE BIT(9) /* 2 HW share pgtable */ -#define DCM_DISABLE BIT(10) -#define STD_AXI_MODE BIT(11) /* For non MM iommu */ +#define HAS_SUB_COMM_2BITS BIT(5) +#define HAS_SUB_COMM_3BITS BIT(6) +#define WR_THROT_EN BIT(7) +#define HAS_LEGACY_IVRP_PADDR BIT(8) +#define IOVA_34_EN BIT(9) +#define SHARE_PGTABLE BIT(10) /* 2 HW share pgtable */ +#define DCM_DISABLE BIT(11) +#define STD_AXI_MODE BIT(12) /* For non MM iommu */ #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -290,9 +293,12 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_pa |= (u64)pa34_32 << 32; fault_port = F_MMU_INT_ID_PORT_ID(regval); - if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_2BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + } else if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_3BITS)) { + fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); + sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); } else { fault_larb = F_MMU_INT_ID_LARB_ID(regval); } @@ -1068,7 +1074,7 @@ static const struct mtk_iommu_plat_data mt2712_data = { static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, - .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN, + .flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), @@ -1105,7 +1111,7 @@ static const struct mtk_iommu_plat_data mt8183_data = { static const struct mtk_iommu_plat_data mt8192_data = { .m4u_plat = M4U_MT8192, - .flags = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN | + .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN | IOVA_34_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, .iova_region = mt8192_multi_dom, diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index dc868fce0d2a..f41e32252056 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -20,7 +20,7 @@ #include #define MTK_LARB_COM_MAX 8 -#define MTK_LARB_SUBCOM_MAX 4 +#define MTK_LARB_SUBCOM_MAX 8 #define MTK_IOMMU_GROUP_MAX 8 From 8cd1e619e79b5bdcd6fc346324a89e25c3325fde Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:07 +0800 Subject: [PATCH 550/803] iommu/mediatek: Add IOMMU_TYPE flag Add IOMMU_TYPE definition. In the mt8195, we have another IOMMU_TYPE: infra iommu, also there will be another APU_IOMMU, thus, use 2bits for the IOMMU_TYPE. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-17-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 937478cd8966..b5b95d9a372e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -126,9 +126,17 @@ #define SHARE_PGTABLE BIT(10) /* 2 HW share pgtable */ #define DCM_DISABLE BIT(11) #define STD_AXI_MODE BIT(12) /* For non MM iommu */ +/* 2 bits: iommu type */ +#define MTK_IOMMU_TYPE_MM (0x0 << 13) +#define MTK_IOMMU_TYPE_INFRA (0x1 << 13) +#define MTK_IOMMU_TYPE_MASK (0x3 << 13) -#define MTK_IOMMU_HAS_FLAG(pdata, _x) \ - ((((pdata)->flags) & (_x)) == (_x)) +#define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ + ((((pdata)->flags) & (mask)) == (_x)) + +#define MTK_IOMMU_HAS_FLAG(pdata, _x) MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, _x) +#define MTK_IOMMU_IS_TYPE(pdata, _x) MTK_IOMMU_HAS_FLAG_MASK(pdata, _x,\ + MTK_IOMMU_TYPE_MASK) struct mtk_iommu_domain { struct io_pgtable_cfg cfg; From d2e9a1102cfc22c08450875faa667a60f0b1b1f6 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:08 +0800 Subject: [PATCH 551/803] iommu/mediatek: Contain MM IOMMU flow with the MM TYPE Prepare for supporting INFRA_IOMMU, and APU_IOMMU later. For Infra IOMMU/APU IOMMU, it doesn't have the "larb""port". thus, Use the MM flag contain the MM_IOMMU special flow, Also, it moves a big chunk code about parsing the mediatek,larbs into a function, this is only needed for MM IOMMU. and all the current SoC are MM_IOMMU. The device link between iommu consumer device and smi-larb device only is needed in MM iommu case. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-18-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 213 ++++++++++++++++++++++---------------- 1 file changed, 122 insertions(+), 91 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b5b95d9a372e..7d4bc562b2a3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -138,6 +138,8 @@ #define MTK_IOMMU_IS_TYPE(pdata, _x) MTK_IOMMU_HAS_FLAG_MASK(pdata, _x,\ MTK_IOMMU_TYPE_MASK) +#define MTK_INVALID_LARBID MTK_LARB_NR_MAX + struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; @@ -274,7 +276,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; - unsigned int fault_larb, fault_port, sub_comm = 0; + unsigned int fault_larb = MTK_INVALID_LARBID, fault_port = 0, sub_comm = 0; u32 int_state, regval, va34_32, pa34_32; u64 fault_iova, fault_pa; bool layer, write; @@ -300,17 +302,19 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); fault_pa |= (u64)pa34_32 << 32; - fault_port = F_MMU_INT_ID_PORT_ID(regval); - if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_2BITS)) { - fault_larb = F_MMU_INT_ID_COMM_ID(regval); - sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); - } else if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_3BITS)) { - fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); - sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); - } else { - fault_larb = F_MMU_INT_ID_LARB_ID(regval); + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + fault_port = F_MMU_INT_ID_PORT_ID(regval); + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_2BITS)) { + fault_larb = F_MMU_INT_ID_COMM_ID(regval); + sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + } else if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_3BITS)) { + fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); + sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); + } else { + fault_larb = F_MMU_INT_ID_LARB_ID(regval); + } + fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; } - fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; if (report_iommu_fault(&dom->domain, data->dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { @@ -374,19 +378,21 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); - larb_mmu = &data->larb_imu[larbid]; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + larb_mmu = &data->larb_imu[larbid]; - region = data->plat_data->iova_region + domid; - larb_mmu->bank[portid] = upper_32_bits(region->iova_base); + region = data->plat_data->iova_region + domid; + larb_mmu->bank[portid] = upper_32_bits(region->iova_base); - dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", - enable ? "enable" : "disable", dev_name(larb_mmu->dev), - portid, domid, larb_mmu->bank[portid]); + dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid, domid, larb_mmu->bank[portid]); - if (enable) - larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); - else - larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); + if (enable) + larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); + else + larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); + } } } @@ -593,6 +599,9 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) data = dev_iommu_priv_get(dev); + if (!MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) + return &data->iommu; + /* * Link the consumer device with the smi-larb device(supplier). * The device that connects with each a larb is a independent HW. @@ -626,9 +635,11 @@ static void mtk_iommu_release_device(struct device *dev) return; data = dev_iommu_priv_get(dev); - larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); - larbdev = data->larb_imu[larbid].dev; - device_link_remove(dev, larbdev); + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + larbdev = data->larb_imu[larbid].dev; + device_link_remove(dev, larbdev); + } iommu_fwspec_free(dev); } @@ -820,19 +831,77 @@ static const struct component_master_ops mtk_iommu_com_ops = { .unbind = mtk_iommu_unbind, }; +static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match, + struct mtk_iommu_data *data) +{ + struct device_node *larbnode, *smicomm_node; + struct platform_device *plarbdev; + struct device_link *link; + int i, larb_nr, ret; + + larb_nr = of_count_phandle_with_args(dev->of_node, "mediatek,larbs", NULL); + if (larb_nr < 0) + return larb_nr; + + for (i = 0; i < larb_nr; i++) { + u32 id; + + larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); + if (!larbnode) + return -EINVAL; + + if (!of_device_is_available(larbnode)) { + of_node_put(larbnode); + continue; + } + + ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); + if (ret)/* The id is consecutive if there is no this property */ + id = i; + + plarbdev = of_find_device_by_node(larbnode); + if (!plarbdev) { + of_node_put(larbnode); + return -ENODEV; + } + if (!plarbdev->dev.driver) { + of_node_put(larbnode); + return -EPROBE_DEFER; + } + data->larb_imu[id].dev = &plarbdev->dev; + + component_match_add_release(dev, match, component_release_of, + component_compare_of, larbnode); + } + + /* Get smi-common dev from the last larb. */ + smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smicomm_node) + return -EINVAL; + + plarbdev = of_find_device_by_node(smicomm_node); + of_node_put(smicomm_node); + data->smicomm_dev = &plarbdev->dev; + + link = device_link_add(data->smicomm_dev, dev, + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + if (!link) { + dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); + return -EINVAL; + } + return 0; +} + static int mtk_iommu_probe(struct platform_device *pdev) { struct mtk_iommu_data *data; struct device *dev = &pdev->dev; - struct device_node *larbnode, *smicomm_node; - struct platform_device *plarbdev; - struct device_link *link; struct resource *res; resource_size_t ioaddr; struct component_match *match = NULL; struct regmap *infracfg; void *protect; - int i, larb_nr, ret; + int ret; u32 val; char *p; @@ -887,59 +956,14 @@ static int mtk_iommu_probe(struct platform_device *pdev) return PTR_ERR(data->bclk); } - larb_nr = of_count_phandle_with_args(dev->of_node, - "mediatek,larbs", NULL); - if (larb_nr < 0) - return larb_nr; - - for (i = 0; i < larb_nr; i++) { - u32 id; - - larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); - if (!larbnode) - return -EINVAL; - - if (!of_device_is_available(larbnode)) { - of_node_put(larbnode); - continue; - } - - ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); - if (ret)/* The id is consecutive if there is no this property */ - id = i; - - plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) { - of_node_put(larbnode); - return -ENODEV; - } - if (!plarbdev->dev.driver) { - of_node_put(larbnode); - return -EPROBE_DEFER; - } - data->larb_imu[id].dev = &plarbdev->dev; - - component_match_add_release(dev, &match, component_release_of, - component_compare_of, larbnode); - } - - /* Get smi-common dev from the last larb. */ - smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); - if (!smicomm_node) - return -EINVAL; - - plarbdev = of_find_device_by_node(smicomm_node); - of_node_put(smicomm_node); - data->smicomm_dev = &plarbdev->dev; - pm_runtime_enable(dev); - link = device_link_add(data->smicomm_dev, dev, - DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); - if (!link) { - dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); - ret = -EINVAL; - goto out_runtime_disable; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + ret = mtk_iommu_mm_dts_parse(dev, &match, data); + if (ret) { + dev_err(dev, "mm dts parse fail(%d).", ret); + goto out_runtime_disable; + } } platform_set_drvdata(pdev, data); @@ -971,9 +995,11 @@ static int mtk_iommu_probe(struct platform_device *pdev) goto out_list_del; } - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); - if (ret) - goto out_bus_set_null; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + if (ret) + goto out_bus_set_null; + } return ret; out_bus_set_null: @@ -984,7 +1010,8 @@ out_list_del: out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); out_link_remove: - device_link_remove(data->smicomm_dev, dev); + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) + device_link_remove(data->smicomm_dev, dev); out_runtime_disable: pm_runtime_disable(dev); return ret; @@ -999,10 +1026,12 @@ static int mtk_iommu_remove(struct platform_device *pdev) list_del(&data->list); - device_link_remove(data->smicomm_dev, &pdev->dev); + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + device_link_remove(data->smicomm_dev, &pdev->dev); + component_master_del(&pdev->dev, &mtk_iommu_com_ops); + } pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); - component_master_del(&pdev->dev, &mtk_iommu_com_ops); return 0; } @@ -1072,7 +1101,8 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = { static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, - .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG | SHARE_PGTABLE, + .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG | SHARE_PGTABLE | + MTK_IOMMU_TYPE_MM, .hw_list = &m4ulist, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, @@ -1082,7 +1112,8 @@ static const struct mtk_iommu_plat_data mt2712_data = { static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, - .flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN, + .flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN | + MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), @@ -1091,7 +1122,7 @@ static const struct mtk_iommu_plat_data mt6779_data = { static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, - .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR, + .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), @@ -1101,7 +1132,7 @@ static const struct mtk_iommu_plat_data mt8167_data = { static const struct mtk_iommu_plat_data mt8173_data = { .m4u_plat = M4U_MT8173, .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | - HAS_LEGACY_IVRP_PADDR, + HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), @@ -1110,7 +1141,7 @@ static const struct mtk_iommu_plat_data mt8173_data = { static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, - .flags = RESET_AXI, + .flags = RESET_AXI | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), @@ -1120,7 +1151,7 @@ static const struct mtk_iommu_plat_data mt8183_data = { static const struct mtk_iommu_plat_data mt8192_data = { .m4u_plat = M4U_MT8192, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | - WR_THROT_EN | IOVA_34_EN, + WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), From f7b71d0d3ee7f83439f82af2d7e595ecd9b9b42c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:09 +0800 Subject: [PATCH 552/803] iommu/mediatek: Adjust device link when it is sub-common For MM IOMMU, We always add device link between smi-common and IOMMU HW. In mt8195, we add smi-sub-common. Thus, if the node is sub-common, we still need find again to get smi-common, then do device link. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-19-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7d4bc562b2a3..bcdc932cd473 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -834,7 +834,7 @@ static const struct component_master_ops mtk_iommu_com_ops = { static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match, struct mtk_iommu_data *data) { - struct device_node *larbnode, *smicomm_node; + struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; struct platform_device *plarbdev; struct device_link *link; int i, larb_nr, ret; @@ -874,11 +874,21 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m component_compare_of, larbnode); } - /* Get smi-common dev from the last larb. */ - smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); - if (!smicomm_node) + /* Get smi-(sub)-common dev from the last larb. */ + smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smi_subcomm_node) return -EINVAL; + /* + * It may have two level smi-common. the node is smi-sub-common if it + * has a new mediatek,smi property. otherwise it is smi-commmon. + */ + smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); + if (smicomm_node) + of_node_put(smi_subcomm_node); + else + smicomm_node = smi_subcomm_node; + plarbdev = of_find_device_by_node(smicomm_node); of_node_put(smicomm_node); data->smicomm_dev = &plarbdev->dev; From 32e1cccf68123bcd357c0115567d2ce3453cd3ed Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:10 +0800 Subject: [PATCH 553/803] iommu/mediatek: Allow IOMMU_DOMAIN_UNMANAGED for PCIe VFIO Allow the type IOMMU_DOMAIN_UNMANAGED since vfio_iommu_type1.c always call iommu_domain_alloc. The PCIe EP works ok when going through vfio. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-20-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bcdc932cd473..0121d71c315f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -446,7 +446,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) { struct mtk_iommu_domain *dom; - if (type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED) return NULL; dom = kzalloc(sizeof(*dom), GFP_KERNEL); From 6077c7e5d22f2e0ffdb1dd97bf37cba66f317627 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:11 +0800 Subject: [PATCH 554/803] iommu/mediatek: Add a PM_CLK_AO flag for infra iommu The power/clock of infra iommu is always on, and it doesn't have the device link with the master devices, then the infra iommu device's PM status is not active, thus we add A PM_CLK_AO flag for infra iommu. The tlb operation is a bit not clear here, there are 2 special cases. Comment them in the code. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-21-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0121d71c315f..cd89c109e8c4 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -130,6 +130,8 @@ #define MTK_IOMMU_TYPE_MM (0x0 << 13) #define MTK_IOMMU_TYPE_INFRA (0x1 << 13) #define MTK_IOMMU_TYPE_MASK (0x3 << 13) +/* PM and clock always on. e.g. infra iommu */ +#define PM_CLK_AO BIT(15) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -235,13 +237,33 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, struct mtk_iommu_data *data) { struct list_head *head = data->hw_list; + bool check_pm_status; unsigned long flags; int ret; u32 tmp; for_each_m4u(data, head) { - if (pm_runtime_get_if_in_use(data->dev) <= 0) - continue; + /* + * To avoid resume the iommu device frequently when the iommu device + * is not active, it doesn't always call pm_runtime_get here, then tlb + * flush depends on the tlb flush all in the runtime resume. + * + * There are 2 special cases: + * + * Case1: The iommu dev doesn't have power domain but has bclk. This case + * should also avoid the tlb flush while the dev is not active to mute + * the tlb timeout log. like mt8173. + * + * Case2: The power/clock of infra iommu is always on, and it doesn't + * have the device link with the master devices. This case should avoid + * the PM status check. + */ + check_pm_status = !MTK_IOMMU_HAS_FLAG(data->plat_data, PM_CLK_AO); + + if (check_pm_status) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + } spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, @@ -268,7 +290,8 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, mtk_iommu_tlb_flush_all(data); } - pm_runtime_put(data->dev); + if (check_pm_status) + pm_runtime_put(data->dev); } } From f9b8c9b2194327c26eecfc82dde9541e80c24c80 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:12 +0800 Subject: [PATCH 555/803] iommu/mediatek: Add infra iommu support The infra iommu enable bits in mt8195 is in the pericfg register segment, use regmap to update it. If infra iommu master translation fault, It doesn't have the larbid/portid, thus print out the whole register value. Since regmap_update_bits may fail, add return value for mtk_iommu_config. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-22-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 36 +++++++++++++++++++++++++++++------- drivers/iommu/mtk_iommu.h | 2 ++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index cd89c109e8c4..ff48506b480c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -112,6 +112,8 @@ #define MTK_PROTECT_PA_ALIGN 256 +#define PERICFG_IOMMU_1 0x714 + #define HAS_4GB_MODE BIT(0) /* HW will use the EMI clock if there isn't the "bclk". */ #define HAS_BCLK BIT(1) @@ -343,8 +345,8 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( data->dev, - "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n", - int_state, fault_iova, fault_pa, fault_larb, fault_port, + "fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n", + int_state, fault_iova, fault_pa, regval, fault_larb, fault_port, layer, write ? "write" : "read"); } @@ -388,14 +390,15 @@ static int mtk_iommu_get_domain_id(struct device *dev, return -EINVAL; } -static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, - bool enable, unsigned int domid) +static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, + bool enable, unsigned int domid) { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); const struct mtk_iommu_iova_region *region; - int i; + u32 peri_mmuen, peri_mmuen_msk; + int i, ret = 0; for (i = 0; i < fwspec->num_ids; ++i) { larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); @@ -415,8 +418,19 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); else larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + peri_mmuen_msk = BIT(portid); + peri_mmuen = enable ? peri_mmuen_msk : 0; + + ret = regmap_update_bits(data->pericfg, PERICFG_IOMMU_1, + peri_mmuen_msk, peri_mmuen); + if (ret) + dev_err(dev, "%s iommu(%s) inframaster 0x%x fail(%d).\n", + enable ? "enable" : "disable", + dev_name(data->dev), peri_mmuen_msk, ret); } } + return ret; } static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, @@ -531,8 +545,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, } mutex_unlock(&data->mutex); - mtk_iommu_config(data, dev, true, domid); - return 0; + return mtk_iommu_config(data, dev, true, domid); err_unlock: mutex_unlock(&data->mutex); @@ -997,6 +1010,15 @@ static int mtk_iommu_probe(struct platform_device *pdev) dev_err(dev, "mm dts parse fail(%d).", ret); goto out_runtime_disable; } + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && + data->plat_data->pericfg_comp_str) { + infracfg = syscon_regmap_lookup_by_compatible(data->plat_data->pericfg_comp_str); + if (IS_ERR(infracfg)) { + ret = PTR_ERR(infracfg); + goto out_runtime_disable; + } + + data->pericfg = infracfg; } platform_set_drvdata(pdev, data); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index f41e32252056..56838fad8c73 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -55,6 +55,7 @@ struct mtk_iommu_plat_data { u32 flags; u32 inv_sel_reg; + char *pericfg_comp_str; struct list_head *hw_list; unsigned int iova_region_nr; const struct mtk_iommu_iova_region *iova_region; @@ -80,6 +81,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ + struct regmap *pericfg; struct mutex mutex; /* Protect m4u_group/m4u_dom above */ From e76290702570bb0ef825050ce4a529b10dab2dba Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:13 +0800 Subject: [PATCH 556/803] iommu/mediatek: Add PCIe support Currently the code for of_iommu_configure_dev_id is like this: static int of_iommu_configure_dev_id(struct device_node *master_np, struct device *dev, const u32 *id) { struct of_phandle_args iommu_spec = { .args_count = 1 }; err = of_map_id(master_np, *id, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); ... } It supports only one id output. BUT our PCIe HW has two ID(one is for writing, the other is for reading). I'm not sure if we should change of_map_id to support output MAX_PHANDLE_ARGS. Here add the solution in ourselve drivers. If it's pcie case, enable one more bit. Not all infra iommu support PCIe, thus add a PCIe support flag here. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-23-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ff48506b480c..d63fe28c1403 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -134,6 +135,7 @@ #define MTK_IOMMU_TYPE_MASK (0x3 << 13) /* PM and clock always on. e.g. infra iommu */ #define PM_CLK_AO BIT(15) +#define IFA_IOMMU_PCIE_SUPPORT BIT(16) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -420,8 +422,11 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { peri_mmuen_msk = BIT(portid); - peri_mmuen = enable ? peri_mmuen_msk : 0; + /* PCI dev has only one output id, enable the next writing bit for PCIe */ + if (dev_is_pci(dev)) + peri_mmuen_msk |= BIT(portid + 1); + peri_mmuen = enable ? peri_mmuen_msk : 0; ret = regmap_update_bits(data->pericfg, PERICFG_IOMMU_1, peri_mmuen_msk, peri_mmuen); if (ret) @@ -1054,6 +1059,15 @@ static int mtk_iommu_probe(struct platform_device *pdev) ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); if (ret) goto out_bus_set_null; + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && + MTK_IOMMU_HAS_FLAG(data->plat_data, IFA_IOMMU_PCIE_SUPPORT)) { +#ifdef CONFIG_PCI + if (!iommu_present(&pci_bus_type)) { + ret = bus_set_iommu(&pci_bus_type, &mtk_iommu_ops); + if (ret) /* PCIe fail don't affect platform_bus. */ + goto out_list_del; + } +#endif } return ret; @@ -1084,6 +1098,11 @@ static int mtk_iommu_remove(struct platform_device *pdev) if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { device_link_remove(data->smicomm_dev, &pdev->dev); component_master_del(&pdev->dev, &mtk_iommu_com_ops); + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && + MTK_IOMMU_HAS_FLAG(data->plat_data, IFA_IOMMU_PCIE_SUPPORT)) { +#ifdef CONFIG_PCI + bus_set_iommu(&pci_bus_type, NULL); +#endif } pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); From ef68a193c719436a2203c2d59442e0f32775385c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:14 +0800 Subject: [PATCH 557/803] iommu/mediatek: Add mt8195 support mt8195 has 3 IOMMU, containing 2 MM IOMMUs, one is for vdo, the other is for vpp. and 1 INFRA IOMMU. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-24-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 41 +++++++++++++++++++++++++++++++++++++++ drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 42 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d63fe28c1403..47355010502e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1233,6 +1233,44 @@ static const struct mtk_iommu_plat_data mt8192_data = { {0, 14, 16}, {0, 13, 18, 17}}, }; +static const struct mtk_iommu_plat_data mt8195_data_infra = { + .m4u_plat = M4U_MT8195, + .flags = WR_THROT_EN | DCM_DISABLE | STD_AXI_MODE | PM_CLK_AO | + MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT, + .pericfg_comp_str = "mediatek,mt8195-pericfg_ao", + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), +}; + +static const struct mtk_iommu_plat_data mt8195_data_vdo = { + .m4u_plat = M4U_MT8195, + .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .larbid_remap = {{2, 0}, {21}, {24}, {7}, {19}, {9, 10, 11}, + {13, 17, 15/* 17b */, 25}, {5}}, +}; + +static const struct mtk_iommu_plat_data mt8195_data_vpp = { + .m4u_plat = M4U_MT8195, + .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .larbid_remap = {{1}, {3}, + {22, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 23}, + {8}, {20}, {12}, + /* 16: 16a; 29: 16b; 30: CCUtop0; 31: CCUtop1 */ + {14, 16, 29, 26, 30, 31, 18}, + {4, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 6}}, +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, @@ -1240,6 +1278,9 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, + { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, + { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, + { .compatible = "mediatek,mt8195-iommu-vpp", .data = &mt8195_data_vpp}, {} }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 56838fad8c73..f2ee11cd254a 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -46,6 +46,7 @@ enum mtk_iommu_plat { M4U_MT8173, M4U_MT8183, M4U_MT8192, + M4U_MT8195, }; struct mtk_iommu_iova_region; From 887cf6a74a2f7225300186ff0896aa93ce42f969 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:15 +0800 Subject: [PATCH 558/803] iommu/mediatek: Only adjust code about register base No functional change. Use "base" instead of the data->base. This is avoid to touch too many lines in the next patches. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-25-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 51 +++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 47355010502e..de448d056135 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -227,12 +227,12 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { + void __iomem *base = data->base; unsigned long flags; spin_lock_irqsave(&data->tlb_lock, flags); - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + data->plat_data->inv_sel_reg); - writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, base + data->plat_data->inv_sel_reg); + writel_relaxed(F_ALL_INVLD, base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ spin_unlock_irqrestore(&data->tlb_lock, flags); } @@ -243,6 +243,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, struct list_head *head = data->hw_list; bool check_pm_status; unsigned long flags; + void __iomem *base; int ret; u32 tmp; @@ -269,23 +270,23 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, continue; } + base = data->base; + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, - data->base + data->plat_data->inv_sel_reg); + base + data->plat_data->inv_sel_reg); - writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), - data->base + REG_MMU_INVLD_START_A); + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), base + REG_MMU_INVLD_START_A); writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1), - data->base + REG_MMU_INVLD_END_A); - writel_relaxed(F_MMU_INV_RANGE, - data->base + REG_MMU_INVALIDATE); + base + REG_MMU_INVLD_END_A); + writel_relaxed(F_MMU_INV_RANGE, base + REG_MMU_INVALIDATE); /* tlb sync */ - ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, + ret = readl_poll_timeout_atomic(base + REG_MMU_CPE_DONE, tmp, tmp != 0, 10, 1000); /* Clear the CPE status */ - writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + writel_relaxed(0, base + REG_MMU_CPE_DONE); spin_unlock_irqrestore(&data->tlb_lock, flags); if (ret) { @@ -305,23 +306,25 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) struct mtk_iommu_domain *dom = data->m4u_dom; unsigned int fault_larb = MTK_INVALID_LARBID, fault_port = 0, sub_comm = 0; u32 int_state, regval, va34_32, pa34_32; + const struct mtk_iommu_plat_data *plat_data = data->plat_data; + void __iomem *base = data->base; u64 fault_iova, fault_pa; bool layer, write; /* Read error info from registers */ - int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1); + int_state = readl_relaxed(base + REG_MMU_FAULT_ST1); if (int_state & F_REG_MMU0_FAULT_MASK) { - regval = readl_relaxed(data->base + REG_MMU0_INT_ID); - fault_iova = readl_relaxed(data->base + REG_MMU0_FAULT_VA); - fault_pa = readl_relaxed(data->base + REG_MMU0_INVLD_PA); + regval = readl_relaxed(base + REG_MMU0_INT_ID); + fault_iova = readl_relaxed(base + REG_MMU0_FAULT_VA); + fault_pa = readl_relaxed(base + REG_MMU0_INVLD_PA); } else { - regval = readl_relaxed(data->base + REG_MMU1_INT_ID); - fault_iova = readl_relaxed(data->base + REG_MMU1_FAULT_VA); - fault_pa = readl_relaxed(data->base + REG_MMU1_INVLD_PA); + regval = readl_relaxed(base + REG_MMU1_INT_ID); + fault_iova = readl_relaxed(base + REG_MMU1_FAULT_VA); + fault_pa = readl_relaxed(base + REG_MMU1_INVLD_PA); } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { + if (MTK_IOMMU_HAS_FLAG(plat_data, IOVA_34_EN)) { va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; fault_iova |= (u64)va34_32 << 32; @@ -329,12 +332,12 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); fault_pa |= (u64)pa34_32 << 32; - if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + if (MTK_IOMMU_IS_TYPE(plat_data, MTK_IOMMU_TYPE_MM)) { fault_port = F_MMU_INT_ID_PORT_ID(regval); - if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_2BITS)) { + if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_2BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); - } else if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM_3BITS)) { + } else if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_3BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); } else { @@ -353,9 +356,9 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } /* Interrupt clear */ - regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL0); + regval = readl_relaxed(base + REG_MMU_INT_CONTROL0); regval |= F_INT_CLR_BIT; - writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0); + writel_relaxed(regval, base + REG_MMU_INT_CONTROL0); mtk_iommu_tlb_flush_all(data); From 634f57df1f3b61cc1b1441762718a14da28c27f8 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:16 +0800 Subject: [PATCH 559/803] iommu/mediatek: Just move code position in hw_init No functional change too, prepare for mt8195 IOMMU support bank functions. Some global control settings are in bank0 while the other banks have their bank independent setting. Here only move the global control settings and the independent registers together. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-26-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index de448d056135..ac1681858af8 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -803,30 +803,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); - regval = F_L2_MULIT_HIT_EN | - F_TABLE_WALK_FAULT_INT_EN | - F_PREETCH_FIFO_OVERFLOW_INT_EN | - F_MISS_FIFO_OVERFLOW_INT_EN | - F_PREFETCH_FIFO_ERR_INT_EN | - F_MISS_FIFO_ERR_INT_EN; - writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0); - - regval = F_INT_TRANSLATION_FAULT | - F_INT_MAIN_MULTI_HIT_FAULT | - F_INT_INVALID_PA_FAULT | - F_INT_ENTRY_REPLACEMENT_FAULT | - F_INT_TLB_MISS_FAULT | - F_INT_MISS_TRANSACTION_FIFO_FAULT | - F_INT_PRETETCH_TRANSATION_FIFO_FAULT; - writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL); - - if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR)) - regval = (data->protect_base >> 1) | (data->enable_4GB << 31); - else - regval = lower_32_bits(data->protect_base) | - upper_32_bits(data->protect_base); - writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); - if (data->enable_4GB && MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) { /* @@ -860,6 +836,30 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL); + regval = F_L2_MULIT_HIT_EN | + F_TABLE_WALK_FAULT_INT_EN | + F_PREETCH_FIFO_OVERFLOW_INT_EN | + F_MISS_FIFO_OVERFLOW_INT_EN | + F_PREFETCH_FIFO_ERR_INT_EN | + F_MISS_FIFO_ERR_INT_EN; + writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0); + + regval = F_INT_TRANSLATION_FAULT | + F_INT_MAIN_MULTI_HIT_FAULT | + F_INT_INVALID_PA_FAULT | + F_INT_ENTRY_REPLACEMENT_FAULT | + F_INT_TLB_MISS_FAULT | + F_INT_MISS_TRANSACTION_FIFO_FAULT | + F_INT_PRETETCH_TRANSATION_FIFO_FAULT; + writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL); + + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR)) + regval = (data->protect_base >> 1) | (data->enable_4GB << 31); + else + regval = lower_32_bits(data->protect_base) | + upper_32_bits(data->protect_base); + writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); + if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); From 9485a04a5bb97e2b9749277005e1f7c23b9ec8be Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:17 +0800 Subject: [PATCH 560/803] iommu/mediatek: Separate mtk_iommu_data for v1 and v2 Prepare for adding the structure "mtk_iommu_bank_data". No functional change. The mtk_iommu_domain in v1 and v2 are different, we could not add current data as bank[0] in v1 simplistically. Currently we have no plan to add new SoC for v1, in order to avoid affect v1 when we add many new features for v2, I totally separate v1 and v2 in this patch, there are many structures only for v2. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-27-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 82 +++++++++++++++++++++++++++++++++--- drivers/iommu/mtk_iommu.h | 81 ----------------------------------- drivers/iommu/mtk_iommu_v1.c | 29 +++++++++++++ 3 files changed, 106 insertions(+), 86 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ac1681858af8..7383a5df6021 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -146,6 +146,69 @@ #define MTK_INVALID_LARBID MTK_LARB_NR_MAX +#define MTK_LARB_COM_MAX 8 +#define MTK_LARB_SUBCOM_MAX 8 + +#define MTK_IOMMU_GROUP_MAX 8 + +enum mtk_iommu_plat { + M4U_MT2712, + M4U_MT6779, + M4U_MT8167, + M4U_MT8173, + M4U_MT8183, + M4U_MT8192, + M4U_MT8195, +}; + +struct mtk_iommu_iova_region { + dma_addr_t iova_base; + unsigned long long size; +}; + +struct mtk_iommu_plat_data { + enum mtk_iommu_plat m4u_plat; + u32 flags; + u32 inv_sel_reg; + + char *pericfg_comp_str; + struct list_head *hw_list; + unsigned int iova_region_nr; + const struct mtk_iommu_iova_region *iova_region; + unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; +}; + +struct mtk_iommu_data { + void __iomem *base; + int irq; + struct device *dev; + struct clk *bclk; + phys_addr_t protect_base; /* protect memory base */ + struct mtk_iommu_suspend_reg reg; + struct mtk_iommu_domain *m4u_dom; + struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; + bool enable_4GB; + spinlock_t tlb_lock; /* lock for tlb range flush */ + + struct iommu_device iommu; + const struct mtk_iommu_plat_data *plat_data; + struct device *smicomm_dev; + + struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ + struct regmap *pericfg; + + struct mutex mutex; /* Protect m4u_group/m4u_dom above */ + + /* + * In the sharing pgtable case, list data->list to the global list like m4ulist. + * In the non-sharing pgtable case, list data->list to the itself hw_list_head. + */ + struct list_head *hw_list; + struct list_head hw_list_head; + struct list_head list; + struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; +}; + struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; @@ -156,6 +219,20 @@ struct mtk_iommu_domain { struct mutex mutex; /* Protect "data" in this structure */ }; +static int mtk_iommu_bind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + return component_bind_all(dev, &data->larb_imu); +} + +static void mtk_iommu_unbind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + component_unbind_all(dev, &data->larb_imu); +} + static const struct iommu_ops mtk_iommu_ops; static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); @@ -193,11 +270,6 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data, head) list_for_each_entry(data, head, list) -struct mtk_iommu_iova_region { - dma_addr_t iova_base; - unsigned long long size; -}; - static const struct mtk_iommu_iova_region single_domain[] = { {.iova_base = 0, .size = SZ_4G}, }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index f2ee11cd254a..305243e18aa9 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -7,23 +7,14 @@ #ifndef _MTK_IOMMU_H_ #define _MTK_IOMMU_H_ -#include -#include #include #include #include #include -#include #include -#include #include #include -#define MTK_LARB_COM_MAX 8 -#define MTK_LARB_SUBCOM_MAX 8 - -#define MTK_IOMMU_GROUP_MAX 8 - struct mtk_iommu_suspend_reg { union { u32 standard_axi_mode;/* v1 */ @@ -38,76 +29,4 @@ struct mtk_iommu_suspend_reg { u32 wr_len_ctrl; }; -enum mtk_iommu_plat { - M4U_MT2701, - M4U_MT2712, - M4U_MT6779, - M4U_MT8167, - M4U_MT8173, - M4U_MT8183, - M4U_MT8192, - M4U_MT8195, -}; - -struct mtk_iommu_iova_region; - -struct mtk_iommu_plat_data { - enum mtk_iommu_plat m4u_plat; - u32 flags; - u32 inv_sel_reg; - - char *pericfg_comp_str; - struct list_head *hw_list; - unsigned int iova_region_nr; - const struct mtk_iommu_iova_region *iova_region; - unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; -}; - -struct mtk_iommu_domain; - -struct mtk_iommu_data { - void __iomem *base; - int irq; - struct device *dev; - struct clk *bclk; - phys_addr_t protect_base; /* protect memory base */ - struct mtk_iommu_suspend_reg reg; - struct mtk_iommu_domain *m4u_dom; - struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; - bool enable_4GB; - spinlock_t tlb_lock; /* lock for tlb range flush */ - - struct iommu_device iommu; - const struct mtk_iommu_plat_data *plat_data; - struct device *smicomm_dev; - - struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ - struct regmap *pericfg; - - struct mutex mutex; /* Protect m4u_group/m4u_dom above */ - - /* - * In the sharing pgtable case, list data->list to the global list like m4ulist. - * In the non-sharing pgtable case, list data->list to the itself hw_list_head. - */ - struct list_head *hw_list; - struct list_head hw_list_head; - struct list_head list; - struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; -}; - -static inline int mtk_iommu_bind(struct device *dev) -{ - struct mtk_iommu_data *data = dev_get_drvdata(dev); - - return component_bind_all(dev, &data->larb_imu); -} - -static inline void mtk_iommu_unbind(struct device *dev) -{ - struct mtk_iommu_data *data = dev_get_drvdata(dev); - - component_unbind_all(dev, &data->larb_imu); -} - #endif diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index ecff800656e6..6d1c09c91e1f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -87,6 +87,21 @@ */ #define M2701_IOMMU_PGT_SIZE SZ_4M +struct mtk_iommu_data { + void __iomem *base; + int irq; + struct device *dev; + struct clk *bclk; + phys_addr_t protect_base; /* protect memory base */ + struct mtk_iommu_domain *m4u_dom; + + struct iommu_device iommu; + struct dma_iommu_mapping *mapping; + struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; + + struct mtk_iommu_suspend_reg reg; +}; + struct mtk_iommu_domain { spinlock_t pgtlock; /* lock for page table */ struct iommu_domain domain; @@ -95,6 +110,20 @@ struct mtk_iommu_domain { struct mtk_iommu_data *data; }; +static int mtk_iommu_bind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + return component_bind_all(dev, &data->larb_imu); +} + +static void mtk_iommu_unbind(struct device *dev) +{ + struct mtk_iommu_data *data = dev_get_drvdata(dev); + + component_unbind_all(dev, &data->larb_imu); +} + static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) { return container_of(dom, struct mtk_iommu_domain, domain); From 6a513de3efe099976fc5924082e48b79c467664f Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:18 +0800 Subject: [PATCH 561/803] iommu/mediatek: Remove mtk_iommu.h Currently there is a suspend structure in the header file. It's no need to keep a header file only for this. Move these into the c file and rm this header file. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-28-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 14 +++++++++++++- drivers/iommu/mtk_iommu.h | 32 -------------------------------- drivers/iommu/mtk_iommu_v1.c | 11 ++++++++--- 3 files changed, 21 insertions(+), 36 deletions(-) delete mode 100644 drivers/iommu/mtk_iommu.h diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7383a5df6021..349640bcbd01 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,7 @@ #include #include -#include "mtk_iommu.h" +#include #define REG_MMU_PT_BASE_ADDR 0x000 #define MMU_PT_ADDR_MASK GENMASK(31, 7) @@ -166,6 +167,17 @@ struct mtk_iommu_iova_region { unsigned long long size; }; +struct mtk_iommu_suspend_reg { + u32 misc_ctrl; + u32 dcm_dis; + u32 ctrl_reg; + u32 int_control0; + u32 int_main_control; + u32 ivrp_paddr; + u32 vld_pa_rng; + u32 wr_len_ctrl; +}; + struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; u32 flags; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h deleted file mode 100644 index 305243e18aa9..000000000000 --- a/drivers/iommu/mtk_iommu.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2015-2016 MediaTek Inc. - * Author: Honghui Zhang - */ - -#ifndef _MTK_IOMMU_H_ -#define _MTK_IOMMU_H_ - -#include -#include -#include -#include -#include -#include -#include - -struct mtk_iommu_suspend_reg { - union { - u32 standard_axi_mode;/* v1 */ - u32 misc_ctrl;/* v2 */ - }; - u32 dcm_dis; - u32 ctrl_reg; - u32 int_control0; - u32 int_main_control; - u32 ivrp_paddr; - u32 vld_pa_rng; - u32 wr_len_ctrl; -}; - -#endif diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6d1c09c91e1f..3d1f0897d1cc 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -7,7 +7,6 @@ * * Based on driver/iommu/mtk_iommu.c */ -#include #include #include #include @@ -28,10 +27,9 @@ #include #include #include -#include +#include #include #include -#include "mtk_iommu.h" #define REG_MMU_PT_BASE_ADDR 0x000 @@ -87,6 +85,13 @@ */ #define M2701_IOMMU_PGT_SIZE SZ_4M +struct mtk_iommu_suspend_reg { + u32 standard_axi_mode; + u32 dcm_dis; + u32 ctrl_reg; + u32 int_control0; +}; + struct mtk_iommu_data { void __iomem *base; int irq; From ad9b10e533f6fb3fa74afdc2341774b1305c176f Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:19 +0800 Subject: [PATCH 562/803] iommu/mediatek-v1: Just rename mtk_iommu to mtk_iommu_v1 No functional change. Just rename this for readable. Differentiate this from mtk_iommu.c Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-29-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 211 +++++++++++++++++------------------ 1 file changed, 103 insertions(+), 108 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 3d1f0897d1cc..62669e60991f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -85,53 +85,53 @@ */ #define M2701_IOMMU_PGT_SIZE SZ_4M -struct mtk_iommu_suspend_reg { +struct mtk_iommu_v1_suspend_reg { u32 standard_axi_mode; u32 dcm_dis; u32 ctrl_reg; u32 int_control0; }; -struct mtk_iommu_data { +struct mtk_iommu_v1_data { void __iomem *base; int irq; struct device *dev; struct clk *bclk; phys_addr_t protect_base; /* protect memory base */ - struct mtk_iommu_domain *m4u_dom; + struct mtk_iommu_v1_domain *m4u_dom; struct iommu_device iommu; struct dma_iommu_mapping *mapping; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; - struct mtk_iommu_suspend_reg reg; + struct mtk_iommu_v1_suspend_reg reg; }; -struct mtk_iommu_domain { +struct mtk_iommu_v1_domain { spinlock_t pgtlock; /* lock for page table */ struct iommu_domain domain; u32 *pgt_va; dma_addr_t pgt_pa; - struct mtk_iommu_data *data; + struct mtk_iommu_v1_data *data; }; -static int mtk_iommu_bind(struct device *dev) +static int mtk_iommu_v1_bind(struct device *dev) { - struct mtk_iommu_data *data = dev_get_drvdata(dev); + struct mtk_iommu_v1_data *data = dev_get_drvdata(dev); return component_bind_all(dev, &data->larb_imu); } -static void mtk_iommu_unbind(struct device *dev) +static void mtk_iommu_v1_unbind(struct device *dev) { - struct mtk_iommu_data *data = dev_get_drvdata(dev); + struct mtk_iommu_v1_data *data = dev_get_drvdata(dev); component_unbind_all(dev, &data->larb_imu); } -static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) +static struct mtk_iommu_v1_domain *to_mtk_domain(struct iommu_domain *dom) { - return container_of(dom, struct mtk_iommu_domain, domain); + return container_of(dom, struct mtk_iommu_v1_domain, domain); } static const int mt2701_m4u_in_larb[] = { @@ -157,7 +157,7 @@ static inline int mt2701_m4u_to_port(int id) return id - mt2701_m4u_in_larb[larb]; } -static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) +static void mtk_iommu_v1_tlb_flush_all(struct mtk_iommu_v1_data *data) { writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); @@ -165,8 +165,8 @@ static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) wmb(); /* Make sure the tlb flush all done */ } -static void mtk_iommu_tlb_flush_range(struct mtk_iommu_data *data, - unsigned long iova, size_t size) +static void mtk_iommu_v1_tlb_flush_range(struct mtk_iommu_v1_data *data, + unsigned long iova, size_t size) { int ret; u32 tmp; @@ -184,16 +184,16 @@ static void mtk_iommu_tlb_flush_range(struct mtk_iommu_data *data, if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(data); + mtk_iommu_v1_tlb_flush_all(data); } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); } -static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) +static irqreturn_t mtk_iommu_v1_isr(int irq, void *dev_id) { - struct mtk_iommu_data *data = dev_id; - struct mtk_iommu_domain *dom = data->m4u_dom; + struct mtk_iommu_v1_data *data = dev_id; + struct mtk_iommu_v1_domain *dom = data->m4u_dom; u32 int_state, regval, fault_iova, fault_pa; unsigned int fault_larb, fault_port; @@ -223,13 +223,13 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) regval |= F_INT_CLR_BIT; writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL); - mtk_iommu_tlb_flush_all(data); + mtk_iommu_v1_tlb_flush_all(data); return IRQ_HANDLED; } -static void mtk_iommu_config(struct mtk_iommu_data *data, - struct device *dev, bool enable) +static void mtk_iommu_v1_config(struct mtk_iommu_v1_data *data, + struct device *dev, bool enable) { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; @@ -251,9 +251,9 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } } -static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) +static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data) { - struct mtk_iommu_domain *dom = data->m4u_dom; + struct mtk_iommu_v1_domain *dom = data->m4u_dom; spin_lock_init(&dom->pgtlock); @@ -269,9 +269,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) return 0; } -static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) +static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type) { - struct mtk_iommu_domain *dom; + struct mtk_iommu_v1_domain *dom; if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; @@ -283,21 +283,20 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) return &dom->domain; } -static void mtk_iommu_domain_free(struct iommu_domain *domain) +static void mtk_iommu_v1_domain_free(struct iommu_domain *domain) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = dom->data; + struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_v1_data *data = dom->data; dma_free_coherent(data->dev, M2701_IOMMU_PGT_SIZE, dom->pgt_va, dom->pgt_pa); kfree(to_mtk_domain(domain)); } -static int mtk_iommu_attach_device(struct iommu_domain *domain, - struct device *dev) +static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); + struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); struct dma_iommu_mapping *mtk_mapping; int ret; @@ -308,29 +307,28 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, if (!data->m4u_dom) { data->m4u_dom = dom; - ret = mtk_iommu_domain_finalise(data); + ret = mtk_iommu_v1_domain_finalise(data); if (ret) { data->m4u_dom = NULL; return ret; } } - mtk_iommu_config(data, dev, true); + mtk_iommu_v1_config(data, dev, true); return 0; } -static void mtk_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); - mtk_iommu_config(data, dev, false); + mtk_iommu_v1_config(data, dev, false); } -static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; unsigned long flags; unsigned int i; @@ -351,16 +349,15 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, spin_unlock_irqrestore(&dom->pgtlock, flags); - mtk_iommu_tlb_flush_range(dom->data, iova, size); + mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); return map_size == size ? 0 : -EEXIST; } -static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *gather) +static size_t mtk_iommu_v1_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned long flags; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; @@ -369,15 +366,14 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, memset(pgt_base_iova, 0, page_num * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); - mtk_iommu_tlb_flush_range(dom->data, iova, size); + mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); return size; } -static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) +static phys_addr_t mtk_iommu_v1_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned long flags; phys_addr_t pa; @@ -389,17 +385,16 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, return pa; } -static const struct iommu_ops mtk_iommu_ops; +static const struct iommu_ops mtk_iommu_v1_ops; /* * MTK generation one iommu HW only support one iommu domain, and all the client * sharing the same iova address space. */ -static int mtk_iommu_create_mapping(struct device *dev, - struct of_phandle_args *args) +static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_args *args) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; + struct mtk_iommu_v1_data *data; struct platform_device *m4updev; struct dma_iommu_mapping *mtk_mapping; int ret; @@ -411,11 +406,11 @@ static int mtk_iommu_create_mapping(struct device *dev, } if (!fwspec) { - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops); + ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); if (ret) return ret; fwspec = dev_iommu_fwspec_get(dev); - } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_ops) { + } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_v1_ops) { return -EINVAL; } @@ -447,16 +442,16 @@ static int mtk_iommu_create_mapping(struct device *dev, return 0; } -static int mtk_iommu_def_domain_type(struct device *dev) +static int mtk_iommu_v1_def_domain_type(struct device *dev) { return IOMMU_DOMAIN_UNMANAGED; } -static struct iommu_device *mtk_iommu_probe_device(struct device *dev) +static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args iommu_spec; - struct mtk_iommu_data *data; + struct mtk_iommu_v1_data *data; int err, idx = 0, larbid, larbidx; struct device_link *link; struct device *larbdev; @@ -474,7 +469,7 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) "#iommu-cells", idx, &iommu_spec)) { - err = mtk_iommu_create_mapping(dev, &iommu_spec); + err = mtk_iommu_v1_create_mapping(dev, &iommu_spec); of_node_put(iommu_spec.np); if (err) return ERR_PTR(err); @@ -484,7 +479,7 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) idx++; } - if (!fwspec || fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_v1_ops) return ERR_PTR(-ENODEV); /* Not a iommu client device */ data = dev_iommu_priv_get(dev); @@ -509,10 +504,10 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) return &data->iommu; } -static void mtk_iommu_probe_finalize(struct device *dev) +static void mtk_iommu_v1_probe_finalize(struct device *dev) { struct dma_iommu_mapping *mtk_mapping; - struct mtk_iommu_data *data; + struct mtk_iommu_v1_data *data; int err; data = dev_iommu_priv_get(dev); @@ -523,14 +518,14 @@ static void mtk_iommu_probe_finalize(struct device *dev) dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n"); } -static void mtk_iommu_release_device(struct device *dev) +static void mtk_iommu_v1_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; + struct mtk_iommu_v1_data *data; struct device *larbdev; unsigned int larbid; - if (!fwspec || fwspec->ops != &mtk_iommu_ops) + if (!fwspec || fwspec->ops != &mtk_iommu_v1_ops) return; data = dev_iommu_priv_get(dev); @@ -541,7 +536,7 @@ static void mtk_iommu_release_device(struct device *dev) iommu_fwspec_free(dev); } -static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) +static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data) { u32 regval; int ret; @@ -571,7 +566,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_DCM_ON, data->base + REG_MMU_DCM); - if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, + if (devm_request_irq(data->dev, data->irq, mtk_iommu_v1_isr, 0, dev_name(data->dev), (void *)data)) { writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); clk_disable_unprepare(data->bclk); @@ -582,39 +577,39 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return 0; } -static const struct iommu_ops mtk_iommu_ops = { - .domain_alloc = mtk_iommu_domain_alloc, - .probe_device = mtk_iommu_probe_device, - .probe_finalize = mtk_iommu_probe_finalize, - .release_device = mtk_iommu_release_device, - .def_domain_type = mtk_iommu_def_domain_type, +static const struct iommu_ops mtk_iommu_v1_ops = { + .domain_alloc = mtk_iommu_v1_domain_alloc, + .probe_device = mtk_iommu_v1_probe_device, + .probe_finalize = mtk_iommu_v1_probe_finalize, + .release_device = mtk_iommu_v1_release_device, + .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, - .iova_to_phys = mtk_iommu_iova_to_phys, - .free = mtk_iommu_domain_free, + .attach_dev = mtk_iommu_v1_attach_device, + .detach_dev = mtk_iommu_v1_detach_device, + .map = mtk_iommu_v1_map, + .unmap = mtk_iommu_v1_unmap, + .iova_to_phys = mtk_iommu_v1_iova_to_phys, + .free = mtk_iommu_v1_domain_free, } }; -static const struct of_device_id mtk_iommu_of_ids[] = { +static const struct of_device_id mtk_iommu_v1_of_ids[] = { { .compatible = "mediatek,mt2701-m4u", }, {} }; -static const struct component_master_ops mtk_iommu_com_ops = { - .bind = mtk_iommu_bind, - .unbind = mtk_iommu_unbind, +static const struct component_master_ops mtk_iommu_v1_com_ops = { + .bind = mtk_iommu_v1_bind, + .unbind = mtk_iommu_v1_unbind, }; -static int mtk_iommu_probe(struct platform_device *pdev) +static int mtk_iommu_v1_probe(struct platform_device *pdev) { - struct mtk_iommu_data *data; struct device *dev = &pdev->dev; + struct mtk_iommu_v1_data *data; struct resource *res; struct component_match *match = NULL; void *protect; @@ -681,7 +676,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); - ret = mtk_iommu_hw_init(data); + ret = mtk_iommu_v1_hw_init(data); if (ret) return ret; @@ -690,17 +685,17 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); + ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev); if (ret) goto out_sysfs_remove; if (!iommu_present(&platform_bus_type)) { - ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_v1_ops); if (ret) goto out_dev_unreg; } - ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + ret = component_master_add_with_match(dev, &mtk_iommu_v1_com_ops, match); if (ret) goto out_bus_set_null; return ret; @@ -714,9 +709,9 @@ out_sysfs_remove: return ret; } -static int mtk_iommu_remove(struct platform_device *pdev) +static int mtk_iommu_v1_remove(struct platform_device *pdev) { - struct mtk_iommu_data *data = platform_get_drvdata(pdev); + struct mtk_iommu_v1_data *data = platform_get_drvdata(pdev); iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); @@ -726,14 +721,14 @@ static int mtk_iommu_remove(struct platform_device *pdev) clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); - component_master_del(&pdev->dev, &mtk_iommu_com_ops); + component_master_del(&pdev->dev, &mtk_iommu_v1_com_ops); return 0; } -static int __maybe_unused mtk_iommu_suspend(struct device *dev) +static int __maybe_unused mtk_iommu_v1_suspend(struct device *dev) { - struct mtk_iommu_data *data = dev_get_drvdata(dev); - struct mtk_iommu_suspend_reg *reg = &data->reg; + struct mtk_iommu_v1_data *data = dev_get_drvdata(dev); + struct mtk_iommu_v1_suspend_reg *reg = &data->reg; void __iomem *base = data->base; reg->standard_axi_mode = readl_relaxed(base + @@ -744,10 +739,10 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) return 0; } -static int __maybe_unused mtk_iommu_resume(struct device *dev) +static int __maybe_unused mtk_iommu_v1_resume(struct device *dev) { - struct mtk_iommu_data *data = dev_get_drvdata(dev); - struct mtk_iommu_suspend_reg *reg = &data->reg; + struct mtk_iommu_v1_data *data = dev_get_drvdata(dev); + struct mtk_iommu_v1_suspend_reg *reg = &data->reg; void __iomem *base = data->base; writel_relaxed(data->m4u_dom->pgt_pa, base + REG_MMU_PT_BASE_ADDR); @@ -760,20 +755,20 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) return 0; } -static const struct dev_pm_ops mtk_iommu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) +static const struct dev_pm_ops mtk_iommu_v1_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_v1_suspend, mtk_iommu_v1_resume) }; -static struct platform_driver mtk_iommu_driver = { - .probe = mtk_iommu_probe, - .remove = mtk_iommu_remove, +static struct platform_driver mtk_iommu_v1_driver = { + .probe = mtk_iommu_v1_probe, + .remove = mtk_iommu_v1_remove, .driver = { .name = "mtk-iommu-v1", - .of_match_table = mtk_iommu_of_ids, - .pm = &mtk_iommu_pm_ops, + .of_match_table = mtk_iommu_v1_of_ids, + .pm = &mtk_iommu_v1_pm_ops, } }; -module_platform_driver(mtk_iommu_driver); +module_platform_driver(mtk_iommu_v1_driver); MODULE_DESCRIPTION("IOMMU API for MediaTek M4U v1 implementations"); MODULE_LICENSE("GPL v2"); From 99ca02281332957e0ee7e3702c4713dfae4a6046 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:20 +0800 Subject: [PATCH 563/803] iommu/mediatek: Add mtk_iommu_bank_data structure Prepare for supporting multi-banks for the IOMMU HW, No functional change. Add a new structure(mtk_iommu_bank_data) for each a bank. Each a bank have the independent HW base/IRQ/tlb-range ops, and each a bank has its special iommu-domain(independent pgtable), thus, also move the domain information into it. In previous SoC, we have only one bank which could be treated as bank0( bankid always is 0 for the previous SoC). After adding this structure, the tlb operations and irq could use bank_data as parameter. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-30-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 177 +++++++++++++++++++++++++------------- 1 file changed, 116 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 349640bcbd01..968e1486e6fd 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -151,6 +151,7 @@ #define MTK_LARB_SUBCOM_MAX 8 #define MTK_IOMMU_GROUP_MAX 8 +#define MTK_IOMMU_BANK_MAX 5 enum mtk_iommu_plat { M4U_MT2712, @@ -187,25 +188,36 @@ struct mtk_iommu_plat_data { struct list_head *hw_list; unsigned int iova_region_nr; const struct mtk_iommu_iova_region *iova_region; + + u8 banks_num; + bool banks_enable[MTK_IOMMU_BANK_MAX]; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; -struct mtk_iommu_data { +struct mtk_iommu_bank_data { void __iomem *base; int irq; + u8 id; + struct device *parent_dev; + struct mtk_iommu_data *parent_data; + spinlock_t tlb_lock; /* lock for tlb range flush */ + struct mtk_iommu_domain *m4u_dom; /* Each bank has a domain */ +}; + +struct mtk_iommu_data { struct device *dev; struct clk *bclk; phys_addr_t protect_base; /* protect memory base */ struct mtk_iommu_suspend_reg reg; - struct mtk_iommu_domain *m4u_dom; struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; bool enable_4GB; - spinlock_t tlb_lock; /* lock for tlb range flush */ struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data; struct device *smicomm_dev; + struct mtk_iommu_bank_data *bank; + struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ struct regmap *pericfg; @@ -225,7 +237,7 @@ struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; - struct mtk_iommu_data *data; + struct mtk_iommu_bank_data *bank; struct iommu_domain domain; struct mutex mutex; /* Protect "data" in this structure */ @@ -311,20 +323,24 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - void __iomem *base = data->base; + /* Tlb flush all always is in bank0. */ + struct mtk_iommu_bank_data *bank = &data->bank[0]; + void __iomem *base = bank->base; unsigned long flags; - spin_lock_irqsave(&data->tlb_lock, flags); + spin_lock_irqsave(&bank->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ - spin_unlock_irqrestore(&data->tlb_lock, flags); + spin_unlock_irqrestore(&bank->tlb_lock, flags); } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, - struct mtk_iommu_data *data) + struct mtk_iommu_bank_data *bank) { - struct list_head *head = data->hw_list; + struct list_head *head = bank->parent_data->hw_list; + struct mtk_iommu_bank_data *curbank; + struct mtk_iommu_data *data; bool check_pm_status; unsigned long flags; void __iomem *base; @@ -354,9 +370,10 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, continue; } - base = data->base; + curbank = &data->bank[bank->id]; + base = curbank->base; - spin_lock_irqsave(&data->tlb_lock, flags); + spin_lock_irqsave(&curbank->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, base + data->plat_data->inv_sel_reg); @@ -371,7 +388,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, /* Clear the CPE status */ writel_relaxed(0, base + REG_MMU_CPE_DONE); - spin_unlock_irqrestore(&data->tlb_lock, flags); + spin_unlock_irqrestore(&curbank->tlb_lock, flags); if (ret) { dev_warn(data->dev, @@ -386,12 +403,13 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { - struct mtk_iommu_data *data = dev_id; - struct mtk_iommu_domain *dom = data->m4u_dom; + struct mtk_iommu_bank_data *bank = dev_id; + struct mtk_iommu_data *data = bank->parent_data; + struct mtk_iommu_domain *dom = bank->m4u_dom; unsigned int fault_larb = MTK_INVALID_LARBID, fault_port = 0, sub_comm = 0; u32 int_state, regval, va34_32, pa34_32; const struct mtk_iommu_plat_data *plat_data = data->plat_data; - void __iomem *base = data->base; + void __iomem *base = bank->base; u64 fault_iova, fault_pa; bool layer, write; @@ -430,10 +448,10 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; } - if (report_iommu_fault(&dom->domain, data->dev, fault_iova, + if (report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( - data->dev, + bank->parent_dev, "fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n", int_state, fault_iova, fault_pa, regval, fault_larb, fault_port, layer, write ? "write" : "read"); @@ -530,12 +548,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, unsigned int domid) { const struct mtk_iommu_iova_region *region; + struct mtk_iommu_domain *m4u_dom; - /* Use the exist domain as there is only one pgtable here. */ - if (data->m4u_dom) { - dom->iop = data->m4u_dom->iop; - dom->cfg = data->m4u_dom->cfg; - dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap; + /* Always use bank0 in sharing pgtable case */ + m4u_dom = data->bank[0].m4u_dom; + if (m4u_dom) { + dom->iop = m4u_dom->iop; + dom->cfg = m4u_dom->cfg; + dom->domain.pgsize_bitmap = m4u_dom->cfg.pgsize_bitmap; goto update_iova_region; } @@ -598,6 +618,8 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct list_head *hw_list = data->hw_list; struct device *m4udev = data->dev; + struct mtk_iommu_bank_data *bank; + unsigned int bankid = 0; int ret, domid; domid = mtk_iommu_get_domain_id(dev, data->plat_data); @@ -605,7 +627,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, return domid; mutex_lock(&dom->mutex); - if (!dom->data) { + if (!dom->bank) { /* Data is in the frstdata in sharing pgtable case. */ frstdata = mtk_iommu_get_frst_data(hw_list); @@ -614,12 +636,13 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, mutex_unlock(&dom->mutex); return -ENODEV; } - dom->data = data; + dom->bank = &data->bank[bankid]; } mutex_unlock(&dom->mutex); mutex_lock(&data->mutex); - if (!data->m4u_dom) { /* Initialize the M4U HW */ + bank = &data->bank[bankid]; + if (!bank->m4u_dom) { /* Initialize the M4U HW */ ret = pm_runtime_resume_and_get(m4udev); if (ret < 0) goto err_unlock; @@ -629,9 +652,9 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, pm_runtime_put(m4udev); goto err_unlock; } - data->m4u_dom = dom; + bank->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, - data->base + REG_MMU_PT_BASE_ADDR); + bank->base + REG_MMU_PT_BASE_ADDR); pm_runtime_put(m4udev); } @@ -658,7 +681,7 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, struct mtk_iommu_domain *dom = to_mtk_domain(domain); /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ - if (dom->data->enable_4GB) + if (dom->bank->parent_data->enable_4GB) paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ @@ -679,7 +702,7 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - mtk_iommu_tlb_flush_all(dom->data); + mtk_iommu_tlb_flush_all(dom->bank->parent_data); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, @@ -688,7 +711,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct mtk_iommu_domain *dom = to_mtk_domain(domain); size_t length = gather->end - gather->start + 1; - mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->data); + mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank); } static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, @@ -696,7 +719,7 @@ static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - mtk_iommu_tlb_flush_range_sync(iova, size, dom->data); + mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, @@ -707,7 +730,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, pa = dom->iop->iova_to_phys(dom->iop, iova); if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) && - dom->data->enable_4GB && + dom->bank->parent_data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); @@ -876,16 +899,17 @@ static const struct iommu_ops mtk_iommu_ops = { static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) { + const struct mtk_iommu_bank_data *bank0 = &data->bank[0]; u32 regval; if (data->plat_data->m4u_plat == M4U_MT8173) { regval = F_MMU_PREFETCH_RT_REPLACE_MOD | F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173; } else { - regval = readl_relaxed(data->base + REG_MMU_CTRL_REG); + regval = readl_relaxed(bank0->base + REG_MMU_CTRL_REG); regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR; } - writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); + writel_relaxed(regval, bank0->base + REG_MMU_CTRL_REG); if (data->enable_4GB && MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_VLD_PA_RNG)) { @@ -894,31 +918,31 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) * 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30]. */ regval = F_MMU_VLD_PA_RNG(7, 4); - writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); + writel_relaxed(regval, bank0->base + REG_MMU_VLD_PA_RNG); } if (MTK_IOMMU_HAS_FLAG(data->plat_data, DCM_DISABLE)) - writel_relaxed(F_MMU_DCM, data->base + REG_MMU_DCM_DIS); + writel_relaxed(F_MMU_DCM, bank0->base + REG_MMU_DCM_DIS); else - writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + writel_relaxed(0, bank0->base + REG_MMU_DCM_DIS); if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) { /* write command throttling mode */ - regval = readl_relaxed(data->base + REG_MMU_WR_LEN_CTRL); + regval = readl_relaxed(bank0->base + REG_MMU_WR_LEN_CTRL); regval &= ~F_MMU_WR_THROT_DIS_MASK; - writel_relaxed(regval, data->base + REG_MMU_WR_LEN_CTRL); + writel_relaxed(regval, bank0->base + REG_MMU_WR_LEN_CTRL); } if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { /* The register is called STANDARD_AXI_MODE in this case */ regval = 0; } else { - regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL); + regval = readl_relaxed(bank0->base + REG_MMU_MISC_CTRL); if (!MTK_IOMMU_HAS_FLAG(data->plat_data, STD_AXI_MODE)) regval &= ~F_MMU_STANDARD_AXI_MODE_MASK; if (MTK_IOMMU_HAS_FLAG(data->plat_data, OUT_ORDER_WR_EN)) regval &= ~F_MMU_IN_ORDER_WR_EN_MASK; } - writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL); + writel_relaxed(regval, bank0->base + REG_MMU_MISC_CTRL); regval = F_L2_MULIT_HIT_EN | F_TABLE_WALK_FAULT_INT_EN | @@ -926,7 +950,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) F_MISS_FIFO_OVERFLOW_INT_EN | F_PREFETCH_FIFO_ERR_INT_EN | F_MISS_FIFO_ERR_INT_EN; - writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0); + writel_relaxed(regval, bank0->base + REG_MMU_INT_CONTROL0); regval = F_INT_TRANSLATION_FAULT | F_INT_MAIN_MULTI_HIT_FAULT | @@ -935,19 +959,19 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) F_INT_TLB_MISS_FAULT | F_INT_MISS_TRANSACTION_FIFO_FAULT | F_INT_PRETETCH_TRANSATION_FIFO_FAULT; - writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL); + writel_relaxed(regval, bank0->base + REG_MMU_INT_MAIN_CONTROL); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR)) regval = (data->protect_base >> 1) | (data->enable_4GB << 31); else regval = lower_32_bits(data->protect_base) | upper_32_bits(data->protect_base); - writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); + writel_relaxed(regval, bank0->base + REG_MMU_IVRP_PADDR); - if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, - dev_name(data->dev), (void *)data)) { - writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); - dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq); + if (devm_request_irq(bank0->parent_dev, bank0->irq, mtk_iommu_isr, 0, + dev_name(bank0->parent_dev), (void *)bank0)) { + writel_relaxed(0, bank0->base + REG_MMU_PT_BASE_ADDR); + dev_err(bank0->parent_dev, "Failed @ IRQ-%d Request\n", bank0->irq); return -ENODEV; } @@ -1039,9 +1063,11 @@ static int mtk_iommu_probe(struct platform_device *pdev) struct component_match *match = NULL; struct regmap *infracfg; void *protect; - int ret; + int ret, banks_num; u32 val; char *p; + struct mtk_iommu_bank_data *bank; + void __iomem *base; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -1079,14 +1105,26 @@ static int mtk_iommu_probe(struct platform_device *pdev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->base = devm_ioremap_resource(dev, res); - if (IS_ERR(data->base)) - return PTR_ERR(data->base); + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); ioaddr = res->start; - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) - return data->irq; + banks_num = data->plat_data->banks_num; + data->bank = devm_kmalloc(dev, banks_num * sizeof(*data->bank), GFP_KERNEL); + if (!data->bank) + return -ENOMEM; + + bank = &data->bank[0]; + bank->id = 0; + bank->base = base; + bank->m4u_dom = NULL; + bank->irq = platform_get_irq(pdev, 0); + if (bank->irq < 0) + return bank->irq; + bank->parent_dev = dev; + bank->parent_data = data; + spin_lock_init(&bank->tlb_lock); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) { data->bclk = devm_clk_get(dev, "bclk"); @@ -1125,8 +1163,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) goto out_sysfs_remove; - spin_lock_init(&data->tlb_lock); - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) { list_add_tail(&data->list, data->plat_data->hw_list); data->hw_list = data->plat_data->hw_list; @@ -1176,6 +1212,7 @@ out_runtime_disable: static int mtk_iommu_remove(struct platform_device *pdev) { struct mtk_iommu_data *data = platform_get_drvdata(pdev); + struct mtk_iommu_bank_data *bank = &data->bank[0]; iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); @@ -1192,7 +1229,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) #endif } pm_runtime_disable(&pdev->dev); - devm_free_irq(&pdev->dev, data->irq, data); + devm_free_irq(&pdev->dev, bank->irq, bank); return 0; } @@ -1200,7 +1237,7 @@ static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; - void __iomem *base = data->base; + void __iomem *base = data->bank[0].base; reg->wr_len_ctrl = readl_relaxed(base + REG_MMU_WR_LEN_CTRL); reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); @@ -1218,8 +1255,8 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; - struct mtk_iommu_domain *m4u_dom = data->m4u_dom; - void __iomem *base = data->base; + struct mtk_iommu_domain *m4u_dom = data->bank[0].m4u_dom; + void __iomem *base = data->bank[0].base; int ret; ret = clk_prepare_enable(data->bclk); @@ -1267,6 +1304,8 @@ static const struct mtk_iommu_plat_data mt2712_data = { .hw_list = &m4ulist, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, .iova_region = single_domain, + .banks_num = 1, + .banks_enable = {true}, .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; @@ -1276,6 +1315,8 @@ static const struct mtk_iommu_plat_data mt6779_data = { .flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, @@ -1285,6 +1326,8 @@ static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */ @@ -1295,6 +1338,8 @@ static const struct mtk_iommu_plat_data mt8173_data = { .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ @@ -1304,6 +1349,8 @@ static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags = RESET_AXI | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, @@ -1314,6 +1361,8 @@ static const struct mtk_iommu_plat_data mt8192_data = { .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, @@ -1326,6 +1375,8 @@ static const struct mtk_iommu_plat_data mt8195_data_infra = { MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT, .pericfg_comp_str = "mediatek,mt8195-pericfg_ao", .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), }; @@ -1336,6 +1387,8 @@ static const struct mtk_iommu_plat_data mt8195_data_vdo = { WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM, .hw_list = &m4ulist, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), .larbid_remap = {{2, 0}, {21}, {24}, {7}, {19}, {9, 10, 11}, @@ -1348,6 +1401,8 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM, .hw_list = &m4ulist, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, .iova_region = mt8192_multi_dom, .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), .larbid_remap = {{1}, {3}, From e24453e165bc774b53f826e86e8f4e9931ffcfba Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:21 +0800 Subject: [PATCH 564/803] iommu/mediatek: Initialise bank HW for each a bank The mt8195 IOMMU HW max support 5 banks, and regarding the banks' registers, it looks like: ---------------------------------------- |bank0 | bank1 | bank2 | bank3 | bank4| ---------------------------------------- |global | |control| null |regs | ----------------------------------------- |bank |bank |bank |bank |bank | |regs |regs |regs |regs |regs | | | | | | | ----------------------------------------- Each bank has some special bank registers and it share bank0's global control registers. this patch initialise the bank hw with the bankid. In the hw_init, we always initialise bank0's control register since we don't know if the bank0 is initialised. Additionally, About each bank's register base, always delta 0x1000. like bank[x + 1] = bank[x] + 0x1000. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-31-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 968e1486e6fd..5f298cf6aac3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -259,7 +259,7 @@ static void mtk_iommu_unbind(struct device *dev) static const struct iommu_ops mtk_iommu_ops; -static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data, unsigned int bankid); #define MTK_IOMMU_TLB_ADDR(iova) ({ \ dma_addr_t _addr = iova; \ @@ -642,12 +642,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, mutex_lock(&data->mutex); bank = &data->bank[bankid]; - if (!bank->m4u_dom) { /* Initialize the M4U HW */ + if (!bank->m4u_dom) { /* Initialize the M4U HW for each a BANK */ ret = pm_runtime_resume_and_get(m4udev); - if (ret < 0) + if (ret < 0) { + dev_err(m4udev, "pm get fail(%d) in attach.\n", ret); goto err_unlock; + } - ret = mtk_iommu_hw_init(data); + ret = mtk_iommu_hw_init(data, bankid); if (ret) { pm_runtime_put(m4udev); goto err_unlock; @@ -897,11 +899,16 @@ static const struct iommu_ops mtk_iommu_ops = { } }; -static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data, unsigned int bankid) { + const struct mtk_iommu_bank_data *bankx = &data->bank[bankid]; const struct mtk_iommu_bank_data *bank0 = &data->bank[0]; u32 regval; + /* + * Global control settings are in bank0. May re-init these global registers + * since no sure if there is bank0 consumers. + */ if (data->plat_data->m4u_plat == M4U_MT8173) { regval = F_MMU_PREFETCH_RT_REPLACE_MOD | F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173; @@ -944,13 +951,14 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) } writel_relaxed(regval, bank0->base + REG_MMU_MISC_CTRL); + /* Independent settings for each bank */ regval = F_L2_MULIT_HIT_EN | F_TABLE_WALK_FAULT_INT_EN | F_PREETCH_FIFO_OVERFLOW_INT_EN | F_MISS_FIFO_OVERFLOW_INT_EN | F_PREFETCH_FIFO_ERR_INT_EN | F_MISS_FIFO_ERR_INT_EN; - writel_relaxed(regval, bank0->base + REG_MMU_INT_CONTROL0); + writel_relaxed(regval, bankx->base + REG_MMU_INT_CONTROL0); regval = F_INT_TRANSLATION_FAULT | F_INT_MAIN_MULTI_HIT_FAULT | @@ -959,19 +967,19 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) F_INT_TLB_MISS_FAULT | F_INT_MISS_TRANSACTION_FIFO_FAULT | F_INT_PRETETCH_TRANSATION_FIFO_FAULT; - writel_relaxed(regval, bank0->base + REG_MMU_INT_MAIN_CONTROL); + writel_relaxed(regval, bankx->base + REG_MMU_INT_MAIN_CONTROL); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR)) regval = (data->protect_base >> 1) | (data->enable_4GB << 31); else regval = lower_32_bits(data->protect_base) | upper_32_bits(data->protect_base); - writel_relaxed(regval, bank0->base + REG_MMU_IVRP_PADDR); + writel_relaxed(regval, bankx->base + REG_MMU_IVRP_PADDR); - if (devm_request_irq(bank0->parent_dev, bank0->irq, mtk_iommu_isr, 0, - dev_name(bank0->parent_dev), (void *)bank0)) { - writel_relaxed(0, bank0->base + REG_MMU_PT_BASE_ADDR); - dev_err(bank0->parent_dev, "Failed @ IRQ-%d Request\n", bank0->irq); + if (devm_request_irq(bankx->parent_dev, bankx->irq, mtk_iommu_isr, 0, + dev_name(bankx->parent_dev), (void *)bankx)) { + writel_relaxed(0, bankx->base + REG_MMU_PT_BASE_ADDR); + dev_err(bankx->parent_dev, "Failed @ IRQ-%d Request\n", bankx->irq); return -ENODEV; } From d72e0ff5424e74b551c73ed2d34c73b70b921733 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:22 +0800 Subject: [PATCH 565/803] iommu/mediatek: Change the domid to iova_region_id Prepare for adding bankid, also no functional change. In the previous SoC, each a iova_region is a domain; In the multi-banks case, each a bank is a domain, then the original function name "mtk_iommu_get_domain_id" is not proper. Use "iova_region_id" instead of "domain_id". Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-32-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5f298cf6aac3..023bb7d3ffb2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -467,8 +467,8 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int mtk_iommu_get_domain_id(struct device *dev, - const struct mtk_iommu_plat_data *plat_data) +static int mtk_iommu_get_iova_region_id(struct device *dev, + const struct mtk_iommu_plat_data *plat_data) { const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; const struct bus_dma_region *dma_rgn = dev->dma_range_map; @@ -498,7 +498,7 @@ static int mtk_iommu_get_domain_id(struct device *dev, } static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, - bool enable, unsigned int domid) + bool enable, unsigned int regionid) { struct mtk_smi_larb_iommu *larb_mmu; unsigned int larbid, portid; @@ -514,12 +514,12 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { larb_mmu = &data->larb_imu[larbid]; - region = data->plat_data->iova_region + domid; + region = data->plat_data->iova_region + regionid; larb_mmu->bank[portid] = upper_32_bits(region->iova_base); - dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", + dev_dbg(dev, "%s iommu for larb(%s) port %d region %d rgn-bank %d.\n", enable ? "enable" : "disable", dev_name(larb_mmu->dev), - portid, domid, larb_mmu->bank[portid]); + portid, regionid, larb_mmu->bank[portid]); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); @@ -545,7 +545,7 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, struct mtk_iommu_data *data, - unsigned int domid) + unsigned int region_id) { const struct mtk_iommu_iova_region *region; struct mtk_iommu_domain *m4u_dom; @@ -584,7 +584,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, update_iova_region: /* Update the iova region for this domain */ - region = data->plat_data->iova_region + domid; + region = data->plat_data->iova_region + region_id; dom->domain.geometry.aperture_start = region->iova_base; dom->domain.geometry.aperture_end = region->iova_base + region->size - 1; dom->domain.geometry.force_aperture = true; @@ -620,18 +620,18 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *m4udev = data->dev; struct mtk_iommu_bank_data *bank; unsigned int bankid = 0; - int ret, domid; + int ret, region_id; - domid = mtk_iommu_get_domain_id(dev, data->plat_data); - if (domid < 0) - return domid; + region_id = mtk_iommu_get_iova_region_id(dev, data->plat_data); + if (region_id < 0) + return region_id; mutex_lock(&dom->mutex); if (!dom->bank) { /* Data is in the frstdata in sharing pgtable case. */ frstdata = mtk_iommu_get_frst_data(hw_list); - ret = mtk_iommu_domain_finalise(dom, frstdata, domid); + ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); if (ret) { mutex_unlock(&dom->mutex); return -ENODEV; @@ -662,7 +662,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, } mutex_unlock(&data->mutex); - return mtk_iommu_config(data, dev, true, domid); + return mtk_iommu_config(data, dev, true, region_id); err_unlock: mutex_unlock(&data->mutex); @@ -802,22 +802,22 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) struct mtk_iommu_data *c_data = dev_iommu_priv_get(dev), *data; struct list_head *hw_list = c_data->hw_list; struct iommu_group *group; - int domid; + int regionid; data = mtk_iommu_get_frst_data(hw_list); if (!data) return ERR_PTR(-ENODEV); - domid = mtk_iommu_get_domain_id(dev, data->plat_data); - if (domid < 0) - return ERR_PTR(domid); + regionid = mtk_iommu_get_iova_region_id(dev, data->plat_data); + if (regionid < 0) + return ERR_PTR(regionid); mutex_lock(&data->mutex); - group = data->m4u_group[domid]; + group = data->m4u_group[regionid]; if (!group) { group = iommu_group_alloc(); if (!IS_ERR(group)) - data->m4u_group[domid] = group; + data->m4u_group[regionid] = group; } else { iommu_group_ref_get(group); } @@ -851,14 +851,14 @@ static void mtk_iommu_get_resv_regions(struct device *dev, struct list_head *head) { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - unsigned int domid = mtk_iommu_get_domain_id(dev, data->plat_data), i; + unsigned int regionid = mtk_iommu_get_iova_region_id(dev, data->plat_data), i; const struct mtk_iommu_iova_region *resv, *curdom; struct iommu_resv_region *region; int prot = IOMMU_WRITE | IOMMU_READ; - if ((int)domid < 0) + if ((int)regionid < 0) return; - curdom = data->plat_data->iova_region + domid; + curdom = data->plat_data->iova_region + regionid; for (i = 0; i < data->plat_data->iova_region_nr; i++) { resv = data->plat_data->iova_region + i; From 57fb481f9008997563a8de96aa49726fd51d6e3e Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:23 +0800 Subject: [PATCH 566/803] iommu/mediatek: Get the proper bankid for multi banks We preassign some ports in a special bank via the new defined banks_portmsk. Put it in the plat_data means it is not expected to be adjusted dynamically. If the iommu id in the iommu consumer's dtsi node is inside this banks_portmsk, then we switch it to this special iommu bank, and initialise the IOMMU bank HW. Each bank has the independent pgtable(4GB iova range). Each bank is a independent iommu domain/group. Currently we don't separate different iova ranges inside a bank. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-33-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 55 ++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 023bb7d3ffb2..5f3e88c44514 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -191,6 +191,7 @@ struct mtk_iommu_plat_data { u8 banks_num; bool banks_enable[MTK_IOMMU_BANK_MAX]; + unsigned int banks_portmsk[MTK_IOMMU_BANK_MAX]; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; @@ -467,6 +468,30 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) return IRQ_HANDLED; } +static unsigned int mtk_iommu_get_bank_id(struct device *dev, + const struct mtk_iommu_plat_data *plat_data) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + unsigned int i, portmsk = 0, bankid = 0; + + if (plat_data->banks_num == 1) + return bankid; + + for (i = 0; i < fwspec->num_ids; i++) + portmsk |= BIT(MTK_M4U_TO_PORT(fwspec->ids[i])); + + for (i = 0; i < plat_data->banks_num && i < MTK_IOMMU_BANK_MAX; i++) { + if (!plat_data->banks_enable[i]) + continue; + + if (portmsk & plat_data->banks_portmsk[i]) { + bankid = i; + break; + } + } + return bankid; /* default is 0 */ +} + static int mtk_iommu_get_iova_region_id(struct device *dev, const struct mtk_iommu_plat_data *plat_data) { @@ -619,13 +644,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct list_head *hw_list = data->hw_list; struct device *m4udev = data->dev; struct mtk_iommu_bank_data *bank; - unsigned int bankid = 0; + unsigned int bankid; int ret, region_id; region_id = mtk_iommu_get_iova_region_id(dev, data->plat_data); if (region_id < 0) return region_id; + bankid = mtk_iommu_get_bank_id(dev, data->plat_data); mutex_lock(&dom->mutex); if (!dom->bank) { /* Data is in the frstdata in sharing pgtable case. */ @@ -797,27 +823,42 @@ static void mtk_iommu_release_device(struct device *dev) iommu_fwspec_free(dev); } +static int mtk_iommu_get_group_id(struct device *dev, const struct mtk_iommu_plat_data *plat_data) +{ + unsigned int bankid; + + /* + * If the bank function is enabled, each bank is a iommu group/domain. + * Otherwise, each iova region is a iommu group/domain. + */ + bankid = mtk_iommu_get_bank_id(dev, plat_data); + if (bankid) + return bankid; + + return mtk_iommu_get_iova_region_id(dev, plat_data); +} + static struct iommu_group *mtk_iommu_device_group(struct device *dev) { struct mtk_iommu_data *c_data = dev_iommu_priv_get(dev), *data; struct list_head *hw_list = c_data->hw_list; struct iommu_group *group; - int regionid; + int groupid; data = mtk_iommu_get_frst_data(hw_list); if (!data) return ERR_PTR(-ENODEV); - regionid = mtk_iommu_get_iova_region_id(dev, data->plat_data); - if (regionid < 0) - return ERR_PTR(regionid); + groupid = mtk_iommu_get_group_id(dev, data->plat_data); + if (groupid < 0) + return ERR_PTR(groupid); mutex_lock(&data->mutex); - group = data->m4u_group[regionid]; + group = data->m4u_group[groupid]; if (!group) { group = iommu_group_alloc(); if (!IS_ERR(group)) - data->m4u_group[regionid] = group; + data->m4u_group[groupid] = group; } else { iommu_group_ref_get(group); } From 42d57fc58aebc5801804424082028f43bad1b73c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:24 +0800 Subject: [PATCH 567/803] iommu/mediatek: Initialise/Remove for multi bank dev The registers for each bank of the IOMMU base are in order, delta is 0x1000. Initialise the base for each bank. For all the previous SoC, we only have bank0. thus use "do {} while()" to allow bank0 always go. When removing the device, Not always all the banks are initialised, it depend on if there is masters for that bank. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-34-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 44 ++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5f3e88c44514..400dea33aea1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -113,6 +113,7 @@ #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) #define MTK_PROTECT_PA_ALIGN 256 +#define MTK_IOMMU_BANK_SZ 0x1000 #define PERICFG_IOMMU_1 0x714 @@ -1112,7 +1113,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) struct component_match *match = NULL; struct regmap *infracfg; void *protect; - int ret, banks_num; + int ret, banks_num, i = 0; u32 val; char *p; struct mtk_iommu_bank_data *bank; @@ -1153,27 +1154,36 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->enable_4GB = !!(val & F_DDR_4GB_SUPPORT_EN); } + banks_num = data->plat_data->banks_num; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (resource_size(res) < banks_num * MTK_IOMMU_BANK_SZ) { + dev_err(dev, "banknr %d. res %pR is not enough.\n", banks_num, res); + return -EINVAL; + } base = devm_ioremap_resource(dev, res); if (IS_ERR(base)) return PTR_ERR(base); ioaddr = res->start; - banks_num = data->plat_data->banks_num; data->bank = devm_kmalloc(dev, banks_num * sizeof(*data->bank), GFP_KERNEL); if (!data->bank) return -ENOMEM; - bank = &data->bank[0]; - bank->id = 0; - bank->base = base; - bank->m4u_dom = NULL; - bank->irq = platform_get_irq(pdev, 0); - if (bank->irq < 0) - return bank->irq; - bank->parent_dev = dev; - bank->parent_data = data; - spin_lock_init(&bank->tlb_lock); + do { + if (!data->plat_data->banks_enable[i]) + continue; + bank = &data->bank[i]; + bank->id = i; + bank->base = base + i * MTK_IOMMU_BANK_SZ; + bank->m4u_dom = NULL; + + bank->irq = platform_get_irq(pdev, i); + if (bank->irq < 0) + return bank->irq; + bank->parent_dev = dev; + bank->parent_data = data; + spin_lock_init(&bank->tlb_lock); + } while (++i < banks_num); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_BCLK)) { data->bclk = devm_clk_get(dev, "bclk"); @@ -1261,7 +1271,8 @@ out_runtime_disable: static int mtk_iommu_remove(struct platform_device *pdev) { struct mtk_iommu_data *data = platform_get_drvdata(pdev); - struct mtk_iommu_bank_data *bank = &data->bank[0]; + struct mtk_iommu_bank_data *bank; + int i; iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); @@ -1278,7 +1289,12 @@ static int mtk_iommu_remove(struct platform_device *pdev) #endif } pm_runtime_disable(&pdev->dev); - devm_free_irq(&pdev->dev, bank->irq, bank); + for (i = 0; i < data->plat_data->banks_num; i++) { + bank = &data->bank[i]; + if (!bank->m4u_dom) + continue; + devm_free_irq(&pdev->dev, bank->irq, bank); + } return 0; } From d7127de15eba3f96f9ed6854d3275a4e5919f025 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:25 +0800 Subject: [PATCH 568/803] iommu/mediatek: Backup/restore regsiters for multi banks Each bank has some independent registers. thus backup/restore them for each a bank when suspend and resume. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-35-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 46 ++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 400dea33aea1..d3e8773b4c47 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -173,11 +173,12 @@ struct mtk_iommu_suspend_reg { u32 misc_ctrl; u32 dcm_dis; u32 ctrl_reg; - u32 int_control0; - u32 int_main_control; - u32 ivrp_paddr; u32 vld_pa_rng; u32 wr_len_ctrl; + + u32 int_control[MTK_IOMMU_BANK_MAX]; + u32 int_main_control[MTK_IOMMU_BANK_MAX]; + u32 ivrp_paddr[MTK_IOMMU_BANK_MAX]; }; struct mtk_iommu_plat_data { @@ -1302,16 +1303,23 @@ static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; - void __iomem *base = data->bank[0].base; + void __iomem *base; + int i = 0; + base = data->bank[i].base; reg->wr_len_ctrl = readl_relaxed(base + REG_MMU_WR_LEN_CTRL); reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS); reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); - reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0); - reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL); - reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR); reg->vld_pa_rng = readl_relaxed(base + REG_MMU_VLD_PA_RNG); + do { + if (!data->plat_data->banks_enable[i]) + continue; + base = data->bank[i].base; + reg->int_control[i] = readl_relaxed(base + REG_MMU_INT_CONTROL0); + reg->int_main_control[i] = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL); + reg->ivrp_paddr[i] = readl_relaxed(base + REG_MMU_IVRP_PADDR); + } while (++i < data->plat_data->banks_num); clk_disable_unprepare(data->bclk); return 0; } @@ -1320,9 +1328,9 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; - struct mtk_iommu_domain *m4u_dom = data->bank[0].m4u_dom; - void __iomem *base = data->bank[0].base; - int ret; + struct mtk_iommu_domain *m4u_dom; + void __iomem *base; + int ret, i = 0; ret = clk_prepare_enable(data->bclk); if (ret) { @@ -1334,18 +1342,26 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) * Uppon first resume, only enable the clk and return, since the values of the * registers are not yet set. */ - if (!m4u_dom) + if (!reg->wr_len_ctrl) return 0; + base = data->bank[i].base; writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL); writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); - writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0); - writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); - writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); + do { + m4u_dom = data->bank[i].m4u_dom; + if (!data->plat_data->banks_enable[i] || !m4u_dom) + continue; + base = data->bank[i].base; + writel_relaxed(reg->int_control[i], base + REG_MMU_INT_CONTROL0); + writel_relaxed(reg->int_main_control[i], base + REG_MMU_INT_MAIN_CONTROL); + writel_relaxed(reg->ivrp_paddr[i], base + REG_MMU_IVRP_PADDR); + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, + base + REG_MMU_PT_BASE_ADDR); + } while (++i < data->plat_data->banks_num); /* * Users may allocate dma buffer before they call pm_runtime_get, From 7597e3c5618d40986b23ddfd65d55d741536bf85 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:26 +0800 Subject: [PATCH 569/803] iommu/mediatek: mt8195: Enable multi banks for infra iommu Enable the multi-bank functions for infra-iommu. We put PCIE in bank0 and USB in the last bank(bank4). and we don't use the other banks currently, disable them. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-36-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index d3e8773b4c47..0fa1d5240ac6 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1456,8 +1456,11 @@ static const struct mtk_iommu_plat_data mt8195_data_infra = { MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT, .pericfg_comp_str = "mediatek,mt8195-pericfg_ao", .inv_sel_reg = REG_MMU_INV_SEL_GEN2, - .banks_num = 1, - .banks_enable = {true}, + .banks_num = 5, + .banks_enable = {true, false, false, false, true}, + .banks_portmsk = {[0] = GENMASK(19, 16), /* PCIe */ + [4] = GENMASK(31, 20), /* USB */ + }, .iova_region = single_domain, .iova_region_nr = ARRAY_SIZE(single_domain), }; From e8d7ccaa3fb7575fda703ee6e05640c1c830fa06 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:14:27 +0800 Subject: [PATCH 570/803] iommu/mediatek: Add mt8186 iommu support Add mt8186 iommu supports. Signed-off-by: Anan Sun Signed-off-by: Yong Wu Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220503071427.2285-37-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0fa1d5240ac6..71b2ace74cd6 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -160,6 +160,7 @@ enum mtk_iommu_plat { M4U_MT8167, M4U_MT8173, M4U_MT8183, + M4U_MT8186, M4U_MT8192, M4U_MT8195, }; @@ -1437,6 +1438,20 @@ static const struct mtk_iommu_plat_data mt8183_data = { .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; +static const struct mtk_iommu_plat_data mt8186_data_mm = { + .m4u_plat = M4U_MT8186, + .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | MTK_IOMMU_TYPE_MM, + .larbid_remap = {{0}, {1, MTK_INVALID_LARBID, 8}, {4}, {7}, {2}, {9, 11, 19, 20}, + {MTK_INVALID_LARBID, 14, 16}, + {MTK_INVALID_LARBID, 13, MTK_INVALID_LARBID, 17}}, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), +}; + static const struct mtk_iommu_plat_data mt8192_data = { .m4u_plat = M4U_MT8192, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | @@ -1503,6 +1518,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, + { .compatible = "mediatek,mt8186-iommu-mm", .data = &mt8186_data_mm}, /* mm: m4u */ { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, From 8b9ad480bd1dd25f4ff4854af5685fa334a2f57a Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 1 May 2022 21:28:23 +0800 Subject: [PATCH 571/803] iommu/msm: Fix an incorrect NULL check on list iterator The bug is here: if (!iommu || iommu->dev->of_node != spec->np) { The list iterator value 'iommu' will *always* be set and non-NULL by list_for_each_entry(), so it is incorrect to assume that the iterator value will be NULL if the list is empty or no element is found (in fact, it will point to a invalid structure object containing HEAD). To fix the bug, use a new value 'iter' as the list iterator, while use the old value 'iommu' as a dedicated variable to point to the found one, and remove the unneeded check for 'iommu->dev->of_node != spec->np' outside the loop. Cc: stable@vger.kernel.org Fixes: f78ebca8ff3d6 ("iommu/msm: Add support for generic master bindings") Signed-off-by: Xiaomeng Tong Link: https://lore.kernel.org/r/20220501132823.12714-1-xiam0nd.tong@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 3d9bd2043738..f09aedfdd462 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -615,16 +615,19 @@ static int insert_iommu_master(struct device *dev, static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - struct msm_iommu_dev *iommu; + struct msm_iommu_dev *iommu = NULL, *iter; unsigned long flags; int ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); - list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) - if (iommu->dev->of_node == spec->np) + list_for_each_entry(iter, &qcom_iommu_devices, dev_node) { + if (iter->dev->of_node == spec->np) { + iommu = iter; break; + } + } - if (!iommu || iommu->dev->of_node != spec->np) { + if (!iommu) { ret = -ENODEV; goto fail; } From 5e469ed9764d4722c59562da13120bd2dc6834c5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Apr 2022 12:50:38 +0200 Subject: [PATCH 572/803] mac80211: fix rx reordering with non explicit / psmp ack policy When the QoS ack policy was set to non explicit / psmp ack, frames are treated as not being part of a BA session, which causes extra latency on reordering. Fix this by only bypassing reordering for packets with no-ack policy Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220420105038.36443-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index beb6b92eb780..88d797fa82ff 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ From 5d087aa759eb82b8208411913f6c2158bd85abc0 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Wed, 20 Apr 2022 04:13:21 +0000 Subject: [PATCH 573/803] nl80211: validate S1G channel width Validate the S1G channel width input by user to ensure it matches that of the requested channel Signed-off-by: Kieran Frewen Signed-off-by: Bassem Dawood Link: https://lore.kernel.org/r/20220420041321.3788789-2-kieran.frewen@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 21e808fcb676..aa6094c3c9b0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3173,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (chandef->chan->band == NL80211_BAND_S1GHZ) { + /* User input error for channel width doesn't match channel */ + if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_CHANNEL_WIDTH], + "bad channel width"); + return -EINVAL; + } + } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); From e847ffe2d146cfd52980ca688d84358e024a6e70 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Wed, 20 Apr 2022 04:13:20 +0000 Subject: [PATCH 574/803] cfg80211: retrieve S1G operating channel number When retrieving the S1G channel number from IEs, we should retrieve the operating channel instead of the primary channel. The S1G operation element specifies the main channel of operation as the oper channel, unlike for HT and HE which specify their main channel of operation as the primary channel. Signed-off-by: Kieran Frewen Signed-off-by: Bassem Dawood Link: https://lore.kernel.org/r/20220420041321.3788789-1-kieran.frewen@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4a6d86432910..6d82bd9eaf8c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) { struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data; - return s1gop->primary_ch; + return s1gop->oper_ch; } } else { tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen); From 86af062f40a73bf63321694e6bf637144f0383fe Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Thu, 28 Apr 2022 10:57:44 +0530 Subject: [PATCH 575/803] mac80211: Reset MBSSID parameters upon connection Currently MBSSID parameters in struct ieee80211_bss_conf are not reset upon connection. This could be problematic with some drivers in a scenario where the device first connects to a non-transmit BSS and then connects to a transmit BSS of a Multi BSS AP. The MBSSID parameters which are set after connecting to a non-transmit BSS will not be reset and the same parameters will be passed on to the driver during the subsequent connection to a transmit BSS of a Multi BSS AP. For example, firmware running on the ath11k device uses the Multi BSS data for tracking the beacon of a non-transmit BSS and reports the driver when there is a beacon miss. If we do not reset the MBSSID parameters during the subsequent connection to a transmit BSS, then the driver would have wrong MBSSID data and FW would be looking for an incorrect BSSID in the MBSSID beacon of a Multi BSS AP and reports beacon loss leading to an unstable connection. Reset the MBSSID parameters upon every connection to solve this problem. Fixes: 78ac51f81532 ("mac80211: support multi-bssid") Signed-off-by: Manikanta Pubbisetty Link: https://lore.kernel.org/r/20220428052744.27040-1-quic_mpubbise@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1b30c724ca8d..dc8aec1a5d3d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, cbss->transmitted_bss->bssid); bss_conf->bssid_indicator = cbss->max_bssid_indicator; bss_conf->bssid_index = cbss->bssid_index; + } else { + bss_conf->nontransmitted = false; + memset(bss_conf->transmitter_bssid, 0, + sizeof(bss_conf->transmitter_bssid)); + bss_conf->bssid_indicator = 0; + bss_conf->bssid_index = 0; } /* From e9f3fb523dbf476dc86beea23f5b5ca8f9687c93 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 24 Apr 2022 18:17:50 -0500 Subject: [PATCH 576/803] mmc: sunxi-mmc: Fix DMA descriptors allocated above 32 bits Newer variants of the MMC controller support a 34-bit physical address space by using word addresses instead of byte addresses. However, the code truncates the DMA descriptor address to 32 bits before applying the shift. This breaks DMA for descriptors allocated above the 32-bit limit. Fixes: 3536b82e5853 ("mmc: sunxi: add support for A100 mmc controller") Signed-off-by: Samuel Holland Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220424231751.32053-1-samuel@sholland.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index c62afd212692..46f9e2923d86 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host, pdes[i].buf_addr_ptr1 = cpu_to_le32(sg_dma_address(&data->sg[i]) >> host->cfg->idma_des_shift); - pdes[i].buf_addr_ptr2 = cpu_to_le32((u32)next_desc >> - host->cfg->idma_des_shift); + pdes[i].buf_addr_ptr2 = + cpu_to_le32(next_desc >> + host->cfg->idma_des_shift); } pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD); From 3e5a8e8494a8122fe4eb3f167662f406cab753b9 Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Sun, 24 Apr 2022 21:32:33 +0530 Subject: [PATCH 577/803] mmc: sdhci-msm: Reset GCC_SDCC_BCR register for SDHC Reset GCC_SDCC_BCR register before every fresh initilazation. This will reset whole SDHC-msm controller, clears the previous power control states and avoids, software reset timeout issues as below. [ 5.458061][ T262] mmc1: Reset 0x1 never completed. [ 5.462454][ T262] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [ 5.469065][ T262] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00007202 [ 5.475688][ T262] mmc1: sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 [ 5.482315][ T262] mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 [ 5.488927][ T262] mmc1: sdhci: Present: 0x01f800f0 | Host ctl: 0x00000000 [ 5.495539][ T262] mmc1: sdhci: Power: 0x00000000 | Blk gap: 0x00000000 [ 5.502162][ T262] mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x00000003 [ 5.508768][ T262] mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 [ 5.515381][ T262] mmc1: sdhci: Int enab: 0x00000000 | Sig enab: 0x00000000 [ 5.521996][ T262] mmc1: sdhci: ACmd stat: 0x00000000 | Slot int: 0x00000000 [ 5.528607][ T262] mmc1: sdhci: Caps: 0x362dc8b2 | Caps_1: 0x0000808f [ 5.535227][ T262] mmc1: sdhci: Cmd: 0x00000000 | Max curr: 0x00000000 [ 5.541841][ T262] mmc1: sdhci: Resp[0]: 0x00000000 | Resp[1]: 0x00000000 [ 5.548454][ T262] mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 [ 5.555079][ T262] mmc1: sdhci: Host ctl2: 0x00000000 [ 5.559651][ T262] mmc1: sdhci_msm: ----------- VENDOR REGISTER DUMP----------- [ 5.566621][ T262] mmc1: sdhci_msm: DLL sts: 0x00000000 | DLL cfg: 0x6000642c | DLL cfg2: 0x0020a000 [ 5.575465][ T262] mmc1: sdhci_msm: DLL cfg3: 0x00000000 | DLL usr ctl: 0x00010800 | DDR cfg: 0x80040873 [ 5.584658][ T262] mmc1: sdhci_msm: Vndr func: 0x00018a9c | Vndr func2 : 0xf88218a8 Vndr func3: 0x02626040 Fixes: 0eb0d9f4de34 ("mmc: sdhci-msm: Initial support for Qualcomm chipsets") Signed-off-by: Shaik Sajida Bhanu Acked-by: Adrian Hunter Reviewed-by: Philipp Zabel Tested-by: Konrad Dybcio Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1650816153-23797-1-git-send-email-quic_c_sbhanu@quicinc.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 50c71e0ba5e4..ff9f5b63c337 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "sdhci-pltfm.h" #include "cqhci.h" @@ -2482,6 +2483,43 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev, of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config); } +static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host) +{ + struct reset_control *reset; + int ret = 0; + + reset = reset_control_get_optional_exclusive(dev, NULL); + if (IS_ERR(reset)) + return dev_err_probe(dev, PTR_ERR(reset), + "unable to acquire core_reset\n"); + + if (!reset) + return ret; + + ret = reset_control_assert(reset); + if (ret) { + reset_control_put(reset); + return dev_err_probe(dev, ret, "core_reset assert failed\n"); + } + + /* + * The hardware requirement for delay between assert/deassert + * is at least 3-4 sleep clock (32.7KHz) cycles, which comes to + * ~125us (4/32768). To be on the safe side add 200us delay. + */ + usleep_range(200, 210); + + ret = reset_control_deassert(reset); + if (ret) { + reset_control_put(reset); + return dev_err_probe(dev, ret, "core_reset deassert failed\n"); + } + + usleep_range(200, 210); + reset_control_put(reset); + + return ret; +} static int sdhci_msm_probe(struct platform_device *pdev) { @@ -2529,6 +2567,10 @@ static int sdhci_msm_probe(struct platform_device *pdev) msm_host->saved_tuning_phase = INVALID_TUNING_PHASE; + ret = sdhci_msm_gcc_reset(&pdev->dev, host); + if (ret) + goto pltfm_free; + /* Setup SDCC bus voter clock. */ msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus"); if (!IS_ERR(msm_host->bus_clk)) { From 57831bfb5e78777dc399e351ed68ef77c3aee385 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sat, 19 Mar 2022 02:28:09 -0700 Subject: [PATCH 578/803] powerpc/pseries/vas: Use QoS credits from the userspace The user can change the QoS credits dynamically with the management console interface which notifies OS with sysfs. After returning from the OS interface successfully, the management console updates the hypervisor. Since the VAS capabilities in the hypervisor is not updated when the OS gets the update, the kernel is using the old total credits value from the hypervisor. Fix this issue by using the new QoS credits from the userspace instead of depending on VAS capabilities from the hypervisor. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/76d156f8af1e03cc09369d68e0bfad0c40031bcc.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas-sysfs.c | 19 +++++++++++++----- arch/powerpc/platforms/pseries/vas.c | 23 +++++++++++----------- arch/powerpc/platforms/pseries/vas.h | 2 +- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 909535ca513a..ec65586cbeb3 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -27,22 +27,31 @@ struct vas_caps_entry { /* * This function is used to get the notification from the drmgr when - * QoS credits are changed. Though receiving the target total QoS - * credits here, get the official QoS capabilities from the hypervisor. + * QoS credits are changed. */ -static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, +static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps, const char *buf, size_t count) { int err; u16 creds; err = kstrtou16(buf, 0, &creds); + /* + * The user space interface from the management console + * notifies OS with the new QoS credits and then the + * hypervisor. So OS has to use this new credits value + * and reconfigure VAS windows (close or reopen depends + * on the credits available) instead of depending on VAS + * QoS capabilities from the hypervisor. + */ if (!err) - err = vas_reconfig_capabilties(caps->win_type); + err = vas_reconfig_capabilties(caps->win_type, creds); if (err) return -EINVAL; + pr_info("Set QoS total credits %u\n", creds); + return count; } @@ -92,7 +101,7 @@ VAS_ATTR_RO(nr_total_credits); VAS_ATTR_RO(nr_used_credits); static struct vas_sysfs_entry update_total_credits_attribute = - __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + __ATTR(update_total_credits, 0200, NULL, update_total_credits_store); static struct attribute *vas_def_capab_attrs[] = { &nr_total_credits_attribute.attr, diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 1f59d78c77a1..ec643bbdb67f 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -779,10 +779,10 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, * changes. Reconfig window configurations based on the credits * availability from this new capabilities. */ -int vas_reconfig_capabilties(u8 type) +int vas_reconfig_capabilties(u8 type, int new_nr_creds) { struct vas_cop_feat_caps *caps; - int old_nr_creds, new_nr_creds; + int old_nr_creds; struct vas_caps *vcaps; int rc = 0, nr_active_wins; @@ -795,12 +795,6 @@ int vas_reconfig_capabilties(u8 type) caps = &vcaps->caps; mutex_lock(&vas_pseries_mutex); - rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, - (u64)virt_to_phys(&hv_cop_caps)); - if (rc) - goto out; - - new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); old_nr_creds = atomic_read(&caps->nr_total_credits); @@ -832,7 +826,6 @@ int vas_reconfig_capabilties(u8 type) false); } -out: mutex_unlock(&vas_pseries_mutex); return rc; } @@ -850,7 +843,7 @@ static int pseries_vas_notifier(struct notifier_block *nb, struct of_reconfig_data *rd = data; struct device_node *dn = rd->dn; const __be32 *intserv = NULL; - int len, rc = 0; + int new_nr_creds, len, rc = 0; if ((action == OF_RECONFIG_ATTACH_NODE) || (action == OF_RECONFIG_DETACH_NODE)) @@ -862,7 +855,15 @@ static int pseries_vas_notifier(struct notifier_block *nb, if (!intserv) return NOTIFY_OK; - rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, + new_nr_creds); + } + if (rc) pr_err("Failed reconfig VAS capabilities with DLPAR\n"); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 34177881e998..333ffa2f9f42 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -135,7 +135,7 @@ struct pseries_vas_window { }; int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); -int vas_reconfig_capabilties(u8 type); +int vas_reconfig_capabilties(u8 type, int new_nr_creds); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #ifdef CONFIG_PPC_VAS From 6d65028eb67dbb7627651adfc460d64196d38bd8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 2 May 2022 22:50:10 +1000 Subject: [PATCH 579/803] powerpc/vdso: Fix incorrect CFI in gettimeofday.S As reported by Alan, the CFI (Call Frame Information) in the VDSO time routines is incorrect since commit ce7d8056e38b ("powerpc/vdso: Prepare for switching VDSO to generic C implementation."). DWARF has a concept called the CFA (Canonical Frame Address), which on powerpc is calculated as an offset from the stack pointer (r1). That means when the stack pointer is changed there must be a corresponding CFI directive to update the calculation of the CFA. The current code is missing those directives for the changes to r1, which prevents gdb from being able to generate a backtrace from inside VDSO functions, eg: Breakpoint 1, 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) bt #0 0x00007ffff7f804dc in __kernel_clock_gettime () #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 #2 0x00007fffffffd960 in ?? () #3 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 Backtrace stopped: frame did not save the PC Alan helpfully describes some rules for correctly maintaining the CFI information: 1) Every adjustment to the current frame address reg (ie. r1) must be described, and exactly at the instruction where r1 changes. Why? Because stack unwinding might want to access previous frames. 2) If a function changes LR or any non-volatile register, the save location for those regs must be given. The CFI can be at any instruction after the saves up to the point that the reg is changed. (Exception: LR save should be described before a bl. not after) 3) If asychronous unwind info is needed then restores of LR and non-volatile regs must also be described. The CFI can be at any instruction after the reg is restored up to the point where the save location is (potentially) trashed. Fix the inability to backtrace by adding CFI directives describing the changes to r1, ie. satisfying rule 1. Also change the information for LR to point to the copy saved on the stack, not the value in r0 that will be overwritten by the function call. Finally, add CFI directives describing the save/restore of r2. With the fix gdb can correctly back trace and navigate up and down the stack: Breakpoint 1, 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) bt #0 0x00007ffff7f804dc in __kernel_clock_gettime () #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 #2 0x0000000100015b60 in gettime () #3 0x000000010000c8bc in print_long_format () #4 0x000000010000d180 in print_current_files () #5 0x00000001000054ac in main () (gdb) up #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 (gdb) #2 0x0000000100015b60 in gettime () (gdb) #3 0x000000010000c8bc in print_long_format () (gdb) #4 0x000000010000d180 in print_current_files () (gdb) #5 0x00000001000054ac in main () (gdb) Initial frame selected; you cannot go up. (gdb) down #4 0x000000010000d180 in print_current_files () (gdb) #3 0x000000010000c8bc in print_long_format () (gdb) #2 0x0000000100015b60 in gettime () (gdb) #1 0x00007ffff7d8872c in clock_gettime@@GLIBC_2.17 () from /lib64/libc.so.6 (gdb) #0 0x00007ffff7f804dc in __kernel_clock_gettime () (gdb) Fixes: ce7d8056e38b ("powerpc/vdso: Prepare for switching VDSO to generic C implementation.") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Alan Modra Signed-off-by: Michael Ellerman Reviewed-by: Segher Boessenkool Link: https://lore.kernel.org/r/20220502125010.1319370-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso/gettimeofday.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index eb9c81e1c218..0c4ecc8fec5a 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -22,12 +22,15 @@ .macro cvdso_call funct call_time=0 .cfi_startproc PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM mflr r0 - .cfi_register lr, r0 PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF #ifdef __powerpc64__ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif get_datapage r5 .ifeq \call_time @@ -39,13 +42,15 @@ PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 #endif .ifeq \call_time cmpwi r3, 0 .endif mtlr r0 - .cfi_restore lr addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 crclr so .ifeq \call_time beqlr+ From 392bf51946c2463436a1ba237c1ec5865b234825 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 4 May 2022 13:39:58 +0100 Subject: [PATCH 580/803] iommu: Make sysfs robust for non-API groups Groups created by VFIO backends outside the core IOMMU API should never be passed directly into the API itself, however they still expose their standard sysfs attributes, so we can still stumble across them that way. Take care to consider those cases before jumping into our normal assumptions of a fully-initialised core API group. Fixes: 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Jason Gunthorpe Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/86ada41986988511a8424e84746dfe9ba7f87573.1651667683.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2c45b85b9fc..857d4c2fd1a2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -506,6 +506,13 @@ int iommu_get_group_resv_regions(struct iommu_group *group, list_for_each_entry(device, &group->devices, list) { struct list_head dev_resv_regions; + /* + * Non-API groups still expose reserved_regions in sysfs, + * so filter out calls that get here that way. + */ + if (!device->dev->iommu) + break; + INIT_LIST_HEAD(&dev_resv_regions); iommu_get_resv_regions(device->dev, &dev_resv_regions); ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); @@ -3019,7 +3026,7 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; - if (WARN_ON(!group)) + if (WARN_ON(!group) || !group->default_domain) return -EINVAL; if (sysfs_streq(buf, "identity")) From 19965d8259fdabc6806da92adda49684f5bcbec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Apr 2022 01:57:15 +0200 Subject: [PATCH 581/803] drm/amdgpu: do not use passthrough mode in Xen dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While technically Xen dom0 is a virtual machine too, it does have access to most of the hardware so it doesn't need to be considered a "passthrough". Commit b818a5d37454 ("drm/amdgpu/gmc: use PCI BARs for APUs in passthrough") changed how FB is accessed based on passthrough mode. This breaks amdgpu in Xen dom0 with message like this: [drm:dc_dmub_srv_wait_idle [amdgpu]] *ERROR* Error waiting for DMUB idle: status=3 While the reason for this failure is unclear, the passthrough mode is not really necessary in Xen dom0 anyway. So, to unbreak booting affected kernels, disable passthrough mode in this case. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1985 Fixes: b818a5d37454 ("drm/amdgpu/gmc: use PCI BARs for APUs in passthrough") Signed-off-by: Marek Marczykowski-Górecki Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a025f080aa6a..5e3756643da3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -710,7 +711,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + /* passthrough mode exclus sriov mod */ + if (is_virtual_machine() && !xen_initial_domain()) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } From 3dfe85fa87b2a26bdbd292b66653bba065cf9941 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 19 Apr 2022 13:03:12 -0400 Subject: [PATCH 582/803] drm/amd/display: Avoid reading audio pattern past AUDIO_CHANNELS_COUNT A faulty receiver might report an erroneous channel count. We should guard against reading beyond AUDIO_CHANNELS_COUNT as that would overflow the dpcd_pattern_period array. Signed-off-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 22dabe596dfc..95b5b5bfa1ff 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4440,7 +4440,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video &dpcd_pattern_type.value, sizeof(dpcd_pattern_type)); - channel_count = dpcd_test_mode.bits.channel_count + 1; + channel_count = min(dpcd_test_mode.bits.channel_count + 1, AUDIO_CHANNELS_COUNT); // read pattern periods for requested channels when sawTooth pattern is requested if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH || From 770fb0942c338545f93a584342b64848cff31efe Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 4 May 2022 11:07:45 -0700 Subject: [PATCH 583/803] MAINTAINERS: Update Josh Poimboeuf's email address Change to my kernel.org email address. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/1abc3de4b00dc6f915ac975a2ec29ed545d96dc4.1651687652.git.jpoimboe@redhat.com --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e8..1e1a2264792d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7499,7 +7499,7 @@ F: Documentation/hwmon/f71805f.rst F: drivers/hwmon/f71805f.c FADDR2LINE -M: Josh Poimboeuf +M: Josh Poimboeuf S: Maintained F: scripts/faddr2line @@ -11348,7 +11348,7 @@ F: drivers/mmc/host/litex_mmc.c N: litex LIVE PATCHING -M: Josh Poimboeuf +M: Josh Poimboeuf M: Jiri Kosina M: Miroslav Benes M: Petr Mladek @@ -14224,7 +14224,7 @@ F: lib/objagg.c F: lib/test_objagg.c OBJTOOL -M: Josh Poimboeuf +M: Josh Poimboeuf M: Peter Zijlstra S: Supported F: tools/objtool/ @@ -18792,7 +18792,7 @@ F: include/dt-bindings/reset/starfive-jh7100.h STATIC BRANCH/CALL M: Peter Zijlstra -M: Josh Poimboeuf +M: Josh Poimboeuf M: Jason Baron R: Steven Rostedt R: Ard Biesheuvel @@ -21444,7 +21444,7 @@ F: arch/x86/kernel/apic/x2apic_uv_x.c F: arch/x86/platform/uv/ X86 STACK UNWINDING -M: Josh Poimboeuf +M: Josh Poimboeuf M: Peter Zijlstra S: Supported F: arch/x86/include/asm/unwind*.h From b2b701b31e1c5827957c88e5c1f0c3dde1f55b2f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 29 Apr 2022 14:46:11 -0500 Subject: [PATCH 584/803] dt-bindings: pinctrl: Allow values for drive-push-pull and drive-open-drain A few platforms, at91 and tegra, use drive-push-pull and drive-open-drain with a 0 or 1 value. There's not really a need for values as '1' should be equivalent to no value (it wasn't treated that way) and drive-push-pull disabled is equivalent to drive-open-drain. So dropping the value can't be done without breaking existing OSs. As we don't want new cases, mark the case with values as deprecated. Cc: Arnd Bergmann Cc: Thierry Reding Cc: Jonathan Hunter Cc: Nicolas Ferre Cc: Claudiu Beznea Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220429194610.2741437-1-robh@kernel.org --- .../devicetree/bindings/pinctrl/pincfg-node.yaml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml index 4b22a9e3a447..f5a121311f61 100644 --- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml @@ -52,11 +52,19 @@ properties: hardware supporting it the pull strength in Ohm. drive-push-pull: - type: boolean + oneOf: + - type: boolean + - $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 1 ] + deprecated: true description: drive actively high and low drive-open-drain: - type: boolean + oneOf: + - type: boolean + - $ref: /schemas/types.yaml#/definitions/uint32 + const: 1 # No known cases of 0 + deprecated: true description: drive with open drain drive-open-source: From caf83e494de965dbb5f8add655c526b9af6a96cb Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 May 2022 11:27:38 -0500 Subject: [PATCH 585/803] dt-bindings: Drop redundant 'maxItems/minItems' in if/then schemas Another round of removing redundant minItems/maxItems when 'items' list is specified. This time it is in if/then schemas as the meta-schema was failing to check this case. If a property has an 'items' list, then a 'minItems' or 'maxItems' with the same size as the list is redundant and can be dropped. Note that is DT schema specific behavior and not standard json-schema behavior. The tooling will fixup the final schema adding any unspecified minItems/maxItems. Signed-off-by: Rob Herring Acked-By: Vinod Koul Acked-by: Marc Kleine-Budde Acked-by: Mark Brown Acked-by: Ulf Hansson # For MMC Acked-by: Jonathan Cameron #for IIO Link: https://lore.kernel.org/r/20220503162738.3827041-1-robh@kernel.org --- .../bindings/clock/imx8m-clock.yaml | 4 ---- .../bindings/display/bridge/renesas,lvds.yaml | 4 ---- .../bindings/display/renesas,du.yaml | 23 ------------------- .../bindings/iio/adc/st,stm32-adc.yaml | 2 -- .../bindings/mmc/nvidia,tegra20-sdhci.yaml | 7 +----- .../devicetree/bindings/mtd/gpmi-nand.yaml | 2 -- .../bindings/net/can/bosch,c_can.yaml | 3 --- .../bindings/phy/brcm,sata-phy.yaml | 10 ++++---- .../bindings/rtc/allwinner,sun6i-a31-rtc.yaml | 10 -------- .../bindings/serial/samsung_uart.yaml | 4 ---- .../sound/allwinner,sun4i-a10-i2s.yaml | 1 - .../bindings/sound/ti,j721e-cpb-audio.yaml | 2 -- .../bindings/thermal/rcar-gen3-thermal.yaml | 1 - 13 files changed, 5 insertions(+), 68 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml index 625f573a7b90..458c7645ee68 100644 --- a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml @@ -55,8 +55,6 @@ allOf: then: properties: clocks: - minItems: 7 - maxItems: 7 items: - description: 32k osc - description: 25m osc @@ -66,8 +64,6 @@ allOf: - description: ext3 clock input - description: ext4 clock input clock-names: - minItems: 7 - maxItems: 7 items: - const: ckil - const: osc_25m diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml index a51baf8a4c76..bb9dbfb9beaf 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml +++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml @@ -95,7 +95,6 @@ then: properties: clocks: minItems: 1 - maxItems: 4 items: - description: Functional clock - description: EXTAL input clock @@ -104,7 +103,6 @@ then: clock-names: minItems: 1 - maxItems: 4 items: - const: fck # The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks. @@ -128,12 +126,10 @@ then: else: properties: clocks: - maxItems: 1 items: - description: Functional clock clock-names: - maxItems: 1 items: - const: fck diff --git a/Documentation/devicetree/bindings/display/renesas,du.yaml b/Documentation/devicetree/bindings/display/renesas,du.yaml index 56cedcd6d576..b3e588022082 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.yaml +++ b/Documentation/devicetree/bindings/display/renesas,du.yaml @@ -109,7 +109,6 @@ allOf: properties: clocks: minItems: 1 - maxItems: 3 items: - description: Functional clock - description: DU_DOTCLKIN0 input clock @@ -117,7 +116,6 @@ allOf: clock-names: minItems: 1 - maxItems: 3 items: - const: du.0 - pattern: '^dclkin\.[01]$' @@ -159,7 +157,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -168,7 +165,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -216,7 +212,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -225,7 +220,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -271,7 +265,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -280,7 +273,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -327,7 +319,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -336,7 +327,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -386,7 +376,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -397,7 +386,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -448,7 +436,6 @@ allOf: properties: clocks: minItems: 4 - maxItems: 8 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -461,7 +448,6 @@ allOf: clock-names: minItems: 4 - maxItems: 8 items: - const: du.0 - const: du.1 @@ -525,7 +511,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -536,7 +521,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -596,7 +580,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 6 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -607,7 +590,6 @@ allOf: clock-names: minItems: 3 - maxItems: 6 items: - const: du.0 - const: du.1 @@ -666,14 +648,12 @@ allOf: properties: clocks: minItems: 1 - maxItems: 2 items: - description: Functional clock for DU0 - description: DU_DOTCLKIN0 input clock clock-names: minItems: 1 - maxItems: 2 items: - const: du.0 - const: dclkin.0 @@ -723,7 +703,6 @@ allOf: properties: clocks: minItems: 2 - maxItems: 4 items: - description: Functional clock for DU0 - description: Functional clock for DU1 @@ -732,7 +711,6 @@ allOf: clock-names: minItems: 2 - maxItems: 4 items: - const: du.0 - const: du.1 @@ -791,7 +769,6 @@ allOf: - description: Functional clock clock-names: - maxItems: 1 items: - const: du.0 diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml index 4d6074518b5c..fa8da42cb1e6 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -138,7 +138,6 @@ allOf: - const: bus - const: adc minItems: 1 - maxItems: 2 interrupts: items: @@ -170,7 +169,6 @@ allOf: - const: bus - const: adc minItems: 1 - maxItems: 2 interrupts: items: diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml index f3f4d5b02744..fe0270207622 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml @@ -202,22 +202,17 @@ allOf: clocks: items: - description: module clock - minItems: 1 - maxItems: 1 else: properties: clocks: items: - description: module clock - description: timeout clock - minItems: 2 - maxItems: 2 + clock-names: items: - const: sdhci - const: tmclk - minItems: 2 - maxItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml index 9d764e654e1d..849aeae319a9 100644 --- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml @@ -147,8 +147,6 @@ allOf: - description: SoC gpmi io clock - description: SoC gpmi bch apb clock clock-names: - minItems: 2 - maxItems: 2 items: - const: gpmi_io - const: gpmi_bch_apb diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml index 8bad328b184d..51aa89ac7e85 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml @@ -80,8 +80,6 @@ if: then: properties: interrupts: - minItems: 4 - maxItems: 4 items: - description: Error and status IRQ - description: Message object IRQ @@ -91,7 +89,6 @@ then: else: properties: interrupts: - maxItems: 1 items: - description: Error and status IRQ diff --git a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml index cb1aa325336f..435b971dfd9b 100644 --- a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml +++ b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml @@ -102,19 +102,17 @@ if: then: properties: reg: - maxItems: 2 + minItems: 2 + reg-names: - items: - - const: "phy" - - const: "phy-ctrl" + minItems: 2 else: properties: reg: maxItems: 1 + reg-names: maxItems: 1 - items: - - const: "phy" required: - compatible diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml index 0b767fec39d8..6b38bd7eb3b4 100644 --- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml @@ -71,7 +71,6 @@ allOf: then: properties: clock-output-names: - minItems: 1 maxItems: 1 - if: @@ -102,7 +101,6 @@ allOf: properties: clock-output-names: minItems: 3 - maxItems: 3 - if: properties: @@ -113,16 +111,12 @@ allOf: then: properties: clocks: - minItems: 3 - maxItems: 3 items: - description: Bus clock for register access - description: 24 MHz oscillator - description: 32 kHz clock from the CCU clock-names: - minItems: 3 - maxItems: 3 items: - const: bus - const: hosc @@ -142,7 +136,6 @@ allOf: properties: clocks: minItems: 3 - maxItems: 4 items: - description: Bus clock for register access - description: 24 MHz oscillator @@ -151,7 +144,6 @@ allOf: clock-names: minItems: 3 - maxItems: 4 items: - const: bus - const: hosc @@ -174,14 +166,12 @@ allOf: then: properties: interrupts: - minItems: 1 maxItems: 1 else: properties: interrupts: minItems: 2 - maxItems: 2 required: - "#clock-cells" diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml index d4688e317fc5..901c1e2cea28 100644 --- a/Documentation/devicetree/bindings/serial/samsung_uart.yaml +++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml @@ -100,7 +100,6 @@ allOf: maxItems: 3 clock-names: minItems: 2 - maxItems: 3 items: - const: uart - pattern: '^clk_uart_baud[0-1]$' @@ -118,11 +117,8 @@ allOf: then: properties: clocks: - minItems: 2 maxItems: 2 clock-names: - minItems: 2 - maxItems: 2 items: - const: uart - const: clk_uart_baud0 diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml index c21c807b667c..34f6ee9de392 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml @@ -89,7 +89,6 @@ allOf: properties: dmas: minItems: 1 - maxItems: 2 items: - description: RX DMA Channel - description: TX DMA Channel diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml index 6806f53a4aed..20ea5883b7ff 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml @@ -80,7 +80,6 @@ allOf: then: properties: clocks: - minItems: 6 items: - description: AUXCLK clock for McASP used by CPB audio - description: Parent for CPB_McASP auxclk (for 48KHz) @@ -107,7 +106,6 @@ allOf: then: properties: clocks: - maxItems: 4 items: - description: AUXCLK clock for McASP used by CPB audio - description: Parent for CPB_McASP auxclk (for 48KHz) diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml index f963204e0b16..1368d90da0e8 100644 --- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml @@ -67,7 +67,6 @@ then: properties: reg: minItems: 2 - maxItems: 3 items: - description: TSC1 registers - description: TSC2 registers From 5dc4630426511f641b7ac44fc550b8e21eafb237 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 2 May 2022 18:13:08 +0900 Subject: [PATCH 586/803] dt-bindings: pci: apple,pcie: Drop max-link-speed from example We no longer use these since 111659c2a570 (and they never worked anyway); drop them from the example to avoid confusion. Fixes: 111659c2a570 ("arm64: dts: apple: t8103: Remove PCIe max-link-speed properties") Signed-off-by: Hector Martin Reviewed-by: Alyssa Rosenzweig Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220502091308.28233-1-marcan@marcan.st --- Documentation/devicetree/bindings/pci/apple,pcie.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/apple,pcie.yaml b/Documentation/devicetree/bindings/pci/apple,pcie.yaml index 7f01e15fc81c..daf602ac0d0f 100644 --- a/Documentation/devicetree/bindings/pci/apple,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/apple,pcie.yaml @@ -142,7 +142,6 @@ examples: device_type = "pci"; reg = <0x0 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 152 0>; - max-link-speed = <2>; #address-cells = <3>; #size-cells = <2>; @@ -153,7 +152,6 @@ examples: device_type = "pci"; reg = <0x800 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 153 0>; - max-link-speed = <2>; #address-cells = <3>; #size-cells = <2>; @@ -164,7 +162,6 @@ examples: device_type = "pci"; reg = <0x1000 0x0 0x0 0x0 0x0>; reset-gpios = <&pinctrl_ap 33 0>; - max-link-speed = <1>; #address-cells = <3>; #size-cells = <2>; From ef91271c65c12d36e4c2b61c61d4849fb6d11aa0 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Sun, 24 Apr 2022 16:01:03 +0800 Subject: [PATCH 587/803] RDMA/siw: Fix a condition race issue in MPA request processing The calling of siw_cm_upcall and detaching new_cep with its listen_cep should be atomistic semantics. Otherwise siw_reject may be called in a temporary state, e,g, siw_cm_upcall is called but the new_cep->listen_cep has not being cleared. This fixes a WARN: WARNING: CPU: 7 PID: 201 at drivers/infiniband/sw/siw/siw_cm.c:255 siw_cep_put+0x125/0x130 [siw] CPU: 2 PID: 201 Comm: kworker/u16:22 Kdump: loaded Tainted: G E 5.17.0-rc7 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: iw_cm_wq cm_work_handler [iw_cm] RIP: 0010:siw_cep_put+0x125/0x130 [siw] Call Trace: siw_reject+0xac/0x180 [siw] iw_cm_reject+0x68/0xc0 [iw_cm] cm_work_handler+0x59d/0xe20 [iw_cm] process_one_work+0x1e2/0x3b0 worker_thread+0x50/0x3a0 ? rescuer_thread+0x390/0x390 kthread+0xe5/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Link: https://lore.kernel.org/r/d528d83466c44687f3872eadcb8c184528b2e2d4.1650526554.git.chengyou@linux.alibaba.com Reported-by: Luis Chamberlain Reviewed-by: Bernard Metzler Signed-off-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7acdd3c3a599..17f34d584cd9 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -968,14 +968,15 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_set_inuse(new_cep); rv = siw_proc_mpareq(new_cep); - siw_cep_set_free(new_cep); - if (rv != -EAGAIN) { siw_cep_put(cep); new_cep->listen_cep = NULL; - if (rv) + if (rv) { + siw_cep_set_free(new_cep); goto error; + } } + siw_cep_set_free(new_cep); } return; From a926a903b7dc39a8a949150258c09290998dd812 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 4 May 2022 15:28:17 -0500 Subject: [PATCH 588/803] RDMA/rxe: Do not call dev_mc_add/del() under a spinlock These routines were not intended to be called under a spinlock and will throw debugging warnings: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 13 PID: 3107 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x2f/0x50 CPU: 13 PID: 3107 Comm: python3 Tainted: G E 5.18.0-rc1+ #7 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:warn_bogus_irq_restore+0x2f/0x50 Call Trace: _raw_spin_unlock_irqrestore+0x75/0x80 rxe_attach_mcast+0x304/0x480 [rdma_rxe] ib_attach_mcast+0x88/0xa0 [ib_core] ib_uverbs_attach_mcast+0x186/0x1e0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xcd/0x140 [ib_uverbs] ib_uverbs_cmd_verbs+0xdb0/0xea0 [ib_uverbs] ib_uverbs_ioctl+0xd2/0x160 [ib_uverbs] do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Move them out of the spinlock, it is OK if there is some races setting up the MC reception at the ethernet layer with rbtree lookups. Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") Link: https://lore.kernel.org/r/20220504202817.98247-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 51 ++++++++++++--------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index ae8f11cb704a..77e45cabd8ea 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) } /** - * rxe_mcast_delete - delete multicast address from rxe device + * rxe_mcast_del - delete multicast address from rxe device * @rxe: rxe device object * @mgid: multicast address as a gid * * Returns 0 on success else an error */ -static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) { unsigned char ll_addr[ETH_ALEN]; @@ -159,17 +159,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) * @mcg: new mcg object * * Context: caller should hold rxe->mcg lock - * Returns: 0 on success else an error */ -static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mcg *mcg) +static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) { - int err; - - err = rxe_mcast_add(rxe, mgid); - if (unlikely(err)) - return err; - kref_init(&mcg->ref_cnt); memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); INIT_LIST_HEAD(&mcg->qp_list); @@ -184,8 +177,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, */ kref_get(&mcg->ref_cnt); __rxe_insert_mcg(mcg); - - return 0; } /** @@ -209,6 +200,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (mcg) return mcg; + /* check to see if we have reached limit */ + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { + err = -ENOMEM; + goto err_dec; + } + /* speculative alloc of new mcg */ mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); if (!mcg) @@ -218,27 +215,23 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) /* re-check to see if someone else just added it */ tmp = __rxe_lookup_mcg(rxe, mgid); if (tmp) { + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + atomic_dec(&rxe->mcg_num); kfree(mcg); - mcg = tmp; - goto out; + return tmp; } - if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { - err = -ENOMEM; - goto err_dec; - } - - err = __rxe_init_mcg(rxe, mgid, mcg); - if (err) - goto err_dec; -out: + __rxe_init_mcg(rxe, mgid, mcg); spin_unlock_irqrestore(&rxe->mcg_lock, flags); - return mcg; + /* add mcast address outside of lock */ + err = rxe_mcast_add(rxe, mgid); + if (!err) + return mcg; + + kfree(mcg); err_dec: atomic_dec(&rxe->mcg_num); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); - kfree(mcg); return ERR_PTR(err); } @@ -268,7 +261,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) __rxe_remove_mcg(mcg); kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); - rxe_mcast_delete(mcg->rxe, &mcg->mgid); atomic_dec(&rxe->mcg_num); } @@ -282,6 +274,9 @@ static void rxe_destroy_mcg(struct rxe_mcg *mcg) { unsigned long flags; + /* delete mcast address outside of lock */ + rxe_mcast_del(mcg->rxe, &mcg->mgid); + spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); __rxe_destroy_mcg(mcg); spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); From bfdc0edd11f9501b891a069b5bbd3b16731941e1 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 4 May 2022 15:28:17 -0500 Subject: [PATCH 589/803] RDMA/rxe: Change mcg_lock to a _bh lock rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock while rxe_recv.c uses _bh spinlocks for the same lock. As there is no case where the mcg_lock can be taken from an IRQ, change these all to bh locks so we don't have confusing mismatched lock types on the same spinlock. Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") Link: https://lore.kernel.org/r/20220504202817.98247-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 77e45cabd8ea..873a9b10307c 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); mcg = __rxe_lookup_mcg(rxe, mgid); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return mcg; } @@ -189,7 +188,6 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg, *tmp; - unsigned long flags; int err; if (rxe->attr.max_mcast_grp == 0) @@ -211,18 +209,18 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (!mcg) return ERR_PTR(-ENOMEM); - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just added it */ tmp = __rxe_lookup_mcg(rxe, mgid); if (tmp) { - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); atomic_dec(&rxe->mcg_num); kfree(mcg); return tmp; } __rxe_init_mcg(rxe, mgid, mcg); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); /* add mcast address outside of lock */ err = rxe_mcast_add(rxe, mgid); @@ -272,14 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) */ static void rxe_destroy_mcg(struct rxe_mcg *mcg) { - unsigned long flags; - /* delete mcast address outside of lock */ rxe_mcast_del(mcg->rxe, &mcg->mgid); - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); + spin_lock_bh(&mcg->rxe->mcg_lock); __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); + spin_unlock_bh(&mcg->rxe->mcg_lock); } /** @@ -334,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; int err; /* check to see if the qp is already a member of the group */ - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry(mca, &mcg->qp_list, qp_list) { if (mca->qp == qp) { - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); /* speculative alloc new mca without using GFP_ATOMIC */ mca = kzalloc(sizeof(*mca), GFP_KERNEL); if (!mca) return -ENOMEM; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just attached qp */ list_for_each_entry(tmp, &mcg->qp_list, qp_list) { if (tmp->qp == qp) { @@ -366,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (err) kfree(mca); out: - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return err; } @@ -400,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { if (mca->qp == qp) { __rxe_cleanup_mca(mca, mcg); @@ -416,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (atomic_read(&mcg->qp_num) <= 0) __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } /* we didn't find the qp on the list */ - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return -EINVAL; } From 59f5ede3bc0f00eb856425f636dab0c10feb06d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 1 May 2022 21:31:43 +0200 Subject: [PATCH 590/803] x86/fpu: Prevent FPU state corruption The FPU usage related to task FPU management is either protected by disabling interrupts (switch_to, return to user) or via fpregs_lock() which is a wrapper around local_bh_disable(). When kernel code wants to use the FPU then it has to check whether it is possible by calling irq_fpu_usable(). But the condition in irq_fpu_usable() is wrong. It allows FPU to be used when: !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle() The latter is checking whether some other context already uses FPU in the kernel, but if that's not the case then it allows FPU to be used unconditionally even if the calling context interrupted a fpregs_lock() critical region. If that happens then the FPU state of the interrupted context becomes corrupted. Allow in kernel FPU usage only when no other context has in kernel FPU usage and either the calling context is not hard interrupt context or the hard interrupt did not interrupt a local bottomhalf disabled region. It's hard to find a proper Fixes tag as the condition was broken in one way or the other for a very long time and the eager/lazy FPU changes caused a lot of churn. Picked something remotely connected from the history. This survived undetected for quite some time as FPU usage in interrupt context is rare, but the recent changes to the random code unearthed it at least on a kernel which had FPU debugging enabled. There is probably a higher rate of silent corruption as not all issues can be detected by the FPU debugging code. This will be addressed in a subsequent change. Fixes: 5d2bd7009f30 ("x86, fpu: decouple non-lazy/eager fpu restore from xsave") Reported-by: Filipe Manana Signed-off-by: Thomas Gleixner Tested-by: Filipe Manana Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220501193102.588689270@linutronix.de --- arch/x86/kernel/fpu/core.c | 67 +++++++++++++++----------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c049561f373a..e28ab0ecc537 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -41,17 +41,7 @@ struct fpu_state_config fpu_user_cfg __ro_after_init; */ struct fpstate init_fpstate __ro_after_init; -/* - * Track whether the kernel is using the FPU state - * currently. - * - * This flag is used: - * - * - by IRQ context code to potentially use the FPU - * if it's unused. - * - * - to debug kernel_fpu_begin()/end() correctness - */ +/* Track in-kernel FPU usage */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* @@ -59,42 +49,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static bool kernel_fpu_disabled(void) -{ - return this_cpu_read(in_kernel_fpu); -} - -static bool interrupted_kernel_fpu_idle(void) -{ - return !kernel_fpu_disabled(); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); + if (WARN_ON_ONCE(in_nmi())) + return false; + + /* In kernel FPU usage already active? */ + if (this_cpu_read(in_kernel_fpu)) + return false; + + /* + * When not in NMI or hard interrupt context, FPU can be used in: + * + * - Task context except from within fpregs_lock()'ed critical + * regions. + * + * - Soft interrupt processing context which cannot happen + * while in a fpregs_lock()'ed critical region. + */ + if (!in_hardirq()) + return true; + + /* + * In hard interrupt context it's safe when soft interrupts + * are enabled, which means the interrupt did not hit in + * a fpregs_lock()'ed critical region. + */ + return !softirq_count(); } EXPORT_SYMBOL(irq_fpu_usable); From ae2de669c14a18b5144cdacf49933ad400ed7e1c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:15 +0200 Subject: [PATCH 591/803] wireguard: selftests: make routing loop test non-fatal I hate to do this, but I still do not have a good solution to actually fix this bug across architectures. So just disable it for now, so that the CI can still deliver actionable results. This commit adds a large red warning, so that at least the failure isn't lost forever, and hopefully this can be revisited down the line. Link: https://lore.kernel.org/netdev/CAHmME9pv1x6C4TNdL6648HydD8r+txpV4hTUXOBVkrapBXH4QQ@mail.gmail.com/ Link: https://lore.kernel.org/netdev/YmszSXueTxYOC41G@zx2c4.com/ Link: https://lore.kernel.org/wireguard/CAHmME9rNnBiNvBstb7MPwK-7AmAN0sOfnhdR=eeLrowWcKxaaQ@mail.gmail.com/ Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 8a9461aa0878..8a543200a61a 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) ! n0 ping -W 1 -c 10 -f 192.168.241.2 || false sleep 1 read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) -(( tx_bytes_after - tx_bytes_before < 70000 )) +if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then + errstart=$'\x1b[37m\x1b[41m\x1b[1m' + errend=$'\x1b[0m' + echo "${errstart} ${errend}" + echo "${errstart} E R R O R ${errend}" + echo "${errstart} ${errend}" + echo "${errstart} This architecture does not do the right thing ${errend}" + echo "${errstart} with cross-namespace routing loops. This test ${errend}" + echo "${errstart} has thus technically failed but, as this issue ${errend}" + echo "${errstart} is as yet unsolved, these tests will continue ${errend}" + echo "${errstart} onward. :( ${errend}" + echo "${errstart} ${errend}" +fi ip0 link del wg1 ip1 link del wg0 From 39f02bf1e5ce9d72045de01e3d618ade1067158c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:16 +0200 Subject: [PATCH 592/803] wireguard: selftests: limit parallelism to $(nproc) tests at once The parallel tests were added to catch queueing issues from multiple cores. But what happens in reality when testing tons of processes is that these separate threads wind up fighting with the scheduler, and we wind up with contention in places we don't care about that decrease the chances of hitting a bug. So just do a test with the number of CPU cores, rather than trying to scale up arbitrarily. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 8a543200a61a..69c7796c7ca9 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -22,10 +22,12 @@ # interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further # details on how this is accomplished. set -e +shopt -s extglob exec 3>&1 export LANG=C export WG_HIDE_KEYS=never +NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]} netns0="wg-test-$$-0" netns1="wg-test-$$-1" netns2="wg-test-$$-2" @@ -143,17 +145,15 @@ tests() { n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 # TCP over IPv4, in parallel - for max in 4 5 50; do - local pids=( ) - for ((i=0; i < max; ++i)) do - n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & - pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) - done - for ((i=0; i < max; ++i)) do - n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & - done - wait "${pids[@]}" + local pids=( ) i + for ((i=0; i < NPROC; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) done + for ((i=0; i < NPROC; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" From d5d9b29bc963cc084c5c0f3a7c28e2632a22e0c4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:17 +0200 Subject: [PATCH 593/803] wireguard: selftests: use newer toolchains to fill out architectures Rather than relying on the system to have cross toolchains available, simply download musl.cc's ones and use that libc.so, and then we use it to fill in a few missing platforms, such as riscv64, riscv64, powerpc64, and s390x. Since riscv doesn't have a second serial port in its device description, we have to use virtio's vport. This is actually the same situation on ARM, but we were previously hacking QEMU up to work around this, which required a custom QEMU. Instead just do the vport trick on ARM too. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../testing/selftests/wireguard/qemu/Makefile | 171 ++++++++++++------ .../wireguard/qemu/arch/aarch64.config | 5 +- .../wireguard/qemu/arch/aarch64_be.config | 5 +- .../selftests/wireguard/qemu/arch/arm.config | 5 +- .../wireguard/qemu/arch/armeb.config | 5 +- .../wireguard/qemu/arch/powerpc64.config | 13 ++ .../wireguard/qemu/arch/riscv32.config | 12 ++ .../wireguard/qemu/arch/riscv64.config | 12 ++ .../wireguard/qemu/arch/s390x.config | 6 + 9 files changed, 170 insertions(+), 64 deletions(-) create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv32.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv64.config create mode 100644 tools/testing/selftests/wireguard/qemu/arch/s390x.config diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 4bdd6c1a19d3..930f59c9e714 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -4,26 +4,24 @@ PWD := $(shell pwd) -CHOST := $(shell gcc -dumpmachine) -HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) -ifneq (,$(ARCH)) -CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) -ifeq (,$(CBUILD)) -$(error The toolchain for $(ARCH) is not installed) -endif -else -CBUILD := $(CHOST) -ARCH := $(firstword $(subst -, ,$(CBUILD))) -endif - # Set these from the environment to override KERNEL_PATH ?= $(PWD)/../../../../.. BUILD_PATH ?= $(PWD)/build/$(ARCH) DISTFILES_PATH ?= $(PWD)/distfiles NR_CPUS ?= 4 +ARCH ?= +CBUILD := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CBUILD))) +ifeq ($(ARCH),) +ARCH := $(HOST_ARCH) +endif MIRROR := https://download.wireguard.com/qemu-test/distfiles/ +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) + default: qemu # variable name, tarball project name, version, tarball extension, default URI base @@ -36,12 +34,11 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) endef define file_download = -$(DISTFILES_PATH)/$(1): +$(DISTFILES_PATH)/$(1): | $(4) mkdir -p $(DISTFILES_PATH) - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) @@ -50,28 +47,20 @@ $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) $(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) -KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) - -export CFLAGS ?= -O3 -pipe -export LDFLAGS ?= -export CPPFLAGS := -I$(BUILD_PATH)/include - +export CFLAGS := -O3 -pipe ifeq ($(HOST_ARCH),$(ARCH)) -CROSS_COMPILE_FLAG := --host=$(CHOST) CFLAGS += -march=native -STRIP := strip -else -$(info Cross compilation: building for $(CBUILD) using $(CHOST)) -CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) -export CROSS_COMPILE=$(CBUILD)- -STRIP := $(CBUILD)-strip endif +export LDFLAGS := +export CPPFLAGS := + +QEMU_VPORT_RESULT := ifeq ($(ARCH),aarch64) +CHOST := aarch64-linux-musl QEMU_ARCH := aarch64 KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),aarch64_be) +CHOST := aarch64_be-linux-musl QEMU_ARCH := aarch64 KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),arm) +CHOST := arm-linux-musleabi QEMU_ARCH := arm KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux endif else ifeq ($(ARCH),armeb) +CHOST := armeb-linux-musleabi QEMU_ARCH := arm KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm else @@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due LDFLAGS += -Wl,--be8 endif else ifeq ($(ARCH),x86_64) +CHOST := x86_64-linux-musl QEMU_ARCH := x86_64 KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage @@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35 CFLAGS += -march=skylake-avx512 endif else ifeq ($(ARCH),i686) +CHOST := i686-linux-musl QEMU_ARCH := i386 KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage @@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35 CFLAGS += -march=prescott endif else ifeq ($(ARCH),mips64) +CHOST := mips64-linux-musl QEMU_ARCH := mips64 KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 CFLAGS += -march=mips64r2 -EB endif else ifeq ($(ARCH),mips64el) +CHOST := mips64el-linux-musl QEMU_ARCH := mips64el KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 CFLAGS += -march=mips64r2 -EL endif else ifeq ($(ARCH),mips) +CHOST := mips-linux-musl QEMU_ARCH := mips KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 CFLAGS += -march=mips32r2 -EB endif else ifeq ($(ARCH),mipsel) +CHOST := mipsel-linux-musl QEMU_ARCH := mipsel KERNEL_ARCH := mips KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -173,7 +174,18 @@ else QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 CFLAGS += -march=mips32r2 -EL endif -else ifeq ($(ARCH),powerpc64le) +else ifeq ($(ARCH),powerpc64) +CHOST := powerpc64-linux-musl +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +else ifeq ($(ARCH),powerpc64le) +CHOST := powerpc64le-linux-musl QEMU_ARCH := ppc64 KERNEL_ARCH := powerpc KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux @@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries else QEMU_MACHINE := -machine pseries endif -CFLAGS += -mcpu=powerpc64le -mlong-double-64 else ifeq ($(ARCH),powerpc) +CHOST := powerpc-linux-musl QEMU_ARCH := ppc KERNEL_ARCH := powerpc KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage @@ -192,26 +204,72 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 else QEMU_MACHINE := -machine ppce500 endif -CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt else ifeq ($(ARCH),m68k) +CHOST := m68k-linux-musl QEMU_ARCH := m68k KERNEL_ARCH := m68k KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE) else QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) endif +else ifeq ($(ARCH),riscv64) +CHOST := riscv64-linux-musl +QEMU_ARCH := riscv64 +KERNEL_ARCH := riscv +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine virt else -$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +QEMU_MACHINE := -cpu rv64 -machine virt +endif +else ifeq ($(ARCH),riscv32) +CHOST := riscv32-linux-musl +QEMU_ARCH := riscv32 +KERNEL_ARCH := riscv +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image +QEMU_VPORT_RESULT := virtio-serial-device +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine virt +else +QEMU_MACHINE := -cpu rv32 -machine virt +endif +else ifeq ($(ARCH),s390x) +CHOST := s390x-linux-musl +QEMU_ARCH := s390x +KERNEL_ARCH := s390 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config) +QEMU_VPORT_RESULT := virtio-serial-ccw +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) +else +QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x) endif -REAL_CC := $(CBUILD)-gcc -MUSL_CC := $(BUILD_PATH)/musl-gcc -export CC := $(MUSL_CC) -USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed +TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz +TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME) +TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross +TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/ +$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8)) +$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123)) +STRIP := $(CHOST)-strip +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +$(info Building for $(CHOST) using $(CBUILD)) +export CROSS_COMPILE := $(CHOST)- +export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) +export CC := $(CHOST)-gcc + +USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed + +comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result @@ -222,13 +280,14 @@ qemu: $(KERNEL_BZIMAGE) $(QEMU_MACHINE) \ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ -serial stdio \ - -serial file:$(BUILD_PATH)/result \ + -chardev file,path=$(BUILD_PATH)/result,id=result \ + $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \ -no-reboot \ -monitor none \ -kernel $< grep -Fq success $(BUILD_PATH)/result -$(BUILD_PATH)/init-cpio-spec.txt: +$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init mkdir -p $(BUILD_PATH) echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ @@ -246,10 +305,10 @@ $(BUILD_PATH)/init-cpio-spec.txt: echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ echo "slink /bin/ping6 ping 777 0 0" >> $@ echo "dir /lib 755 0 0" >> $@ - echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ - echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@ + echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@ -$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config +$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config mkdir -p $(KERNEL_BUILD_PATH) cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config @@ -258,29 +317,20 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) -$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) +$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config - $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install +$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed + rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install touch $@ -$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) +$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) - $(MAKE) -C $(MUSL_PATH) - $(STRIP) -s $@ - -$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so - $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so touch $@ -$(MUSL_CC): $(MUSL_PATH)/lib/libc.so - sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs - printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc - chmod +x $(BUILD_PATH)/musl-gcc - $(IPERF_PATH)/.installed: $(IPERF_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< @@ -289,6 +339,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR) touch $@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && autoreconf -fi cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no $(MAKE) -C $(IPERF_PATH) $(STRIP) -s $@ @@ -304,7 +355,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) mkdir -p $(BUILD_PATH) - $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< $(STRIP) -s $@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config index 3d063bb247bb..e9ac41f6b3ae 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -1,5 +1,8 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config index dbdc7e406a7b..03609a203e8c 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -1,6 +1,9 @@ CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index 148f49905418..c616124fdd59 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index bd76b07d00a2..d3a40a974e16 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" CONFIG_CPU_BIG_ENDIAN=y CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config new file mode 100644 index 000000000000..c19c7b519d8b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config @@ -0,0 +1,13 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 +CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config new file mode 100644 index 000000000000..ea53e78e923e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -0,0 +1,12 @@ +CONFIG_ARCH_RV32I=y +CONFIG_MMU=y +CONFIG_FPU=y +CONFIG_SOC_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config new file mode 100644 index 000000000000..4a08d8c1d4af --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config @@ -0,0 +1,12 @@ +CONFIG_ARCH_RV64I=y +CONFIG_MMU=y +CONFIG_FPU=y +CONFIG_SOC_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config new file mode 100644 index 000000000000..274a44f4e49c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config @@ -0,0 +1,6 @@ +CONFIG_SCLP_VT220_TTY=y +CONFIG_SCLP_VT220_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_S390_GUEST=y +CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1" From d261ba6aa411e03c27da266b7df4bef771e8105e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:18 +0200 Subject: [PATCH 594/803] wireguard: selftests: restore support for ccache When moving to non-system toolchains, we inadvertantly killed the ability to use ccache. So instead, build ccache support into the test harness directly. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../selftests/wireguard/qemu/.gitignore | 1 + .../testing/selftests/wireguard/qemu/Makefile | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore index bfa15e6feb2f..42ab9d72b37b 100644 --- a/tools/testing/selftests/wireguard/qemu/.gitignore +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only build/ distfiles/ +ccache/ diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 930f59c9e714..e121ef3878af 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -266,6 +266,13 @@ $(info Building for $(CHOST) using $(CBUILD)) export CROSS_COMPILE := $(CHOST)- export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) export CC := $(CHOST)-gcc +CCACHE_PATH := $(shell which ccache 2>/dev/null) +ifneq ($(CCACHE_PATH),) +export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015 +export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH) +export CCACHE_SLOPPINESS := file_macro,time_macros +export CCACHE_DIR ?= $(PWD)/ccache +endif USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed @@ -329,6 +336,10 @@ $(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so +ifneq ($(CCACHE_PATH),) + mkdir -p $(TOOLCHAIN_PATH)/bin/ccache + ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC) +endif touch $@ $(IPERF_PATH)/.installed: $(IPERF_TAR) @@ -421,8 +432,13 @@ clean: distclean: clean rm -rf $(DISTFILES_PATH) +cacheclean: clean +ifneq ($(CCACHE_DIR),) + rm -rf $(CCACHE_DIR) +endif + menuconfig: $(KERNEL_BUILD_PATH)/.config $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig -.PHONY: qemu build clean distclean menuconfig +.PHONY: qemu build clean distclean cacheclean menuconfig .DELETE_ON_ERROR: From a6b8ea9144340c0aaa66c817a3bbb6bca47f0321 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:19 +0200 Subject: [PATCH 595/803] wireguard: selftests: bump package deps Use newer, more reliable package dependencies. These should hopefully reduce flakes. However, we keep the old iputils package, as it accumulated bugs after resulting in flakes on slow machines. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- .../testing/selftests/wireguard/qemu/Makefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e121ef3878af..bca07b93eeb0 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -39,13 +39,13 @@ $(DISTFILES_PATH)/$(1): | $(4) flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) -$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) -$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) -$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) -$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) +$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f)) +$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0)) +$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac)) export CFLAGS := -O3 -pipe ifeq ($(HOST_ARCH),$(ARCH)) @@ -385,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR) touch $@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) - cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble $(MAKE) -C $(BASH_PATH) $(STRIP) -s $@ $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk - printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk + printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile touch $@ $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) From 3fc1b11e5d7278437bdfff0e01f51e777eefb222 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 May 2022 22:29:20 +0200 Subject: [PATCH 596/803] wireguard: selftests: set panic_on_warn=1 from cmdline Rather than setting this once init is running, set panic_on_warn from the kernel command line, so that it catches splats from WireGuard initialization code and the various crypto selftests. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/arch/aarch64.config | 2 +- .../testing/selftests/wireguard/qemu/arch/aarch64_be.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/arm.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/armeb.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/i686.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/m68k.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips64.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mips64el.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/mipsel.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/powerpc.config | 2 +- .../testing/selftests/wireguard/qemu/arch/powerpc64.config | 2 +- .../selftests/wireguard/qemu/arch/powerpc64le.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/riscv32.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/riscv64.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/s390x.config | 2 +- tools/testing/selftests/wireguard/qemu/arch/x86_64.config | 2 +- tools/testing/selftests/wireguard/qemu/init.c | 6 ------ 18 files changed, 17 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config index e9ac41f6b3ae..09016880ce03 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -4,5 +4,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config index 03609a203e8c..19ff66e4c602 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -5,5 +5,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index c616124fdd59..fc7959bef9c2 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -8,5 +8,5 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index d3a40a974e16..f3066be81c19 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -8,6 +8,6 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CPU_BIG_ENDIAN=y CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index a9b4fe795048..6d90892a85a2 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -2,5 +2,5 @@ CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config index 62a15bdb877e..82c925e49beb 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -5,5 +5,5 @@ CONFIG_MAC=y CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config index df71d6b95546..d7ec63c17b30 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config index 90c783f725c4..0994947e3392 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config @@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config index 435b0b43e00c..591184342f47 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config @@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config index 62bb50c4a85f..18a498293737 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config index 57957093b71b..5e04882e8e35 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_MATH_EMULATION=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config index c19c7b519d8b..737194b7619e 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config @@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_BIG_ENDIAN=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config index f52f1e2bc7f6..8148b9d1220a 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config index ea53e78e923e..0bd0e72d95d4 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config index 4a08d8c1d4af..dc266f3b1915 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config @@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CMDLINE_FORCE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config index 274a44f4e49c..a7b44dca0b0a 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/s390x.config +++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config @@ -3,4 +3,4 @@ CONFIG_SCLP_VT220_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_CONSOLE=y CONFIG_S390_GUEST=y -CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1" +CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1" diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config index 45dd53a0d760..efa00693e08b 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -2,5 +2,5 @@ CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index 0b45055d9de0..2a0f48fac925 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -110,12 +110,6 @@ static void enable_logging(void) panic("write(exception-trace)"); close(fd); } - fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); - if (fd >= 0) { - if (write(fd, "1\n", 2) != 2) - panic("write(panic_on_warn)"); - close(fd); - } } static void kmod_selftests(void) From b2d057560b8107c633b39aabe517ff9d93f285e3 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:08 +0200 Subject: [PATCH 597/803] secure_seq: use the 64 bits of the siphash for port offset calculation SipHash replaced MD5 in secure_ipv{4,6}_port_ephemeral() via commit 7cd23e5300c1 ("secure_seq: use SipHash in place of MD5"), but the output remained truncated to 32-bit only. In order to exploit more bits from the hash, let's make the functions return the full 64-bit of siphash_3u32(). We also make sure the port offset calculation in __inet_hash_connect() remains done on 32-bit to avoid the need for div_u64_rem() and an extra cost on 32-bit systems. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 2 +- include/net/secure_seq.h | 4 ++-- net/core/secure_seq.c | 4 ++-- net/ipv4/inet_hashtables.c | 10 ++++++---- net/ipv6/inet6_hashtables.c | 4 ++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f72ec113ae56..98e1ec1a14f0 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c2..dac91aa38c5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@ #include -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9b8443774449..55aa5cc258e3 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -94,7 +94,7 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { @@ -142,7 +142,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq); -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_3u32((__force u32)saddr, (__force u32)daddr, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 17440840a791..9d24d9319f3d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -504,7 +504,7 @@ not_unique: return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -734,7 +734,7 @@ EXPORT_SYMBOL_GPL(inet_unhash); static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { @@ -777,7 +777,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, sizeof(table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); - offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining; + offset = READ_ONCE(table_perturb[index]) + port_offset; + offset %= remaining; + /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -859,7 +861,7 @@ ok: int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4740afecf7c6..32ccac10bd62 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -308,7 +308,7 @@ not_unique: return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); From 9e9b70ae923baf2b5e8a0ea4fd0c8451801ac526 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:09 +0200 Subject: [PATCH 598/803] tcp: use different parts of the port_offset for index and offset Amit Klein suggests that we use different parts of port_offset for the table's index and the port offset so that there is no direct relation between them. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9d24d9319f3d..29c701cd8312 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -777,7 +777,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, sizeof(table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); - offset = READ_ONCE(table_perturb[index]) + port_offset; + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; /* In first pass we try ports of @low parity. From 4dfa9b438ee34caca4e6a4e5e961641807367f6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 May 2022 10:46:10 +0200 Subject: [PATCH 599/803] tcp: resalt the secret every 10 seconds In order to limit the ability for an observer to recognize the source ports sequence used to contact a set of destinations, we should periodically shuffle the secret. 10 seconds looks effective enough without causing particular issues. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Cc: Jason A. Donenfeld Tested-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/core/secure_seq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 55aa5cc258e3..5f85e01d4093 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -22,6 +22,8 @@ static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; +#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) + static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); @@ -100,11 +102,13 @@ u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, const struct { struct in6_addr saddr; struct in6_addr daddr; + unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, - .dport = dport + .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), @@ -145,8 +149,10 @@ EXPORT_SYMBOL_GPL(secure_tcp_seq); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); - return siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u16)dport, &net_secret); + return siphash_4u32((__force u32)saddr, (__force u32)daddr, + (__force u16)dport, + jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif From ca7af0402550f9a0b3316d5f1c30904e42ed257d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:11 +0200 Subject: [PATCH 600/803] tcp: add small random increments to the source port Here we're randomly adding between 0 and 7 random increments to the selected source port in order to add some noise in the source port selection that will make the next port less predictable. With the default port range of 32768-60999 this means a worst case reuse scenario of 14116/8=1764 connections between two consecutive uses of the same port, with an average of 14116/4.5=3137. This code was stressed at more than 800000 connections per second to a fixed target with all connections closed by the client using RSTs (worst condition) and only 2 connections failed among 13 billion, despite the hash being reseeded every 10 seconds, indicating a perfectly safe situation. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 29c701cd8312..63bb4902f018 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -833,11 +833,12 @@ next_port: return -EADDRNOTAVAIL; ok: - /* If our first attempt found a candidate, skip next candidate - * in 1/16 of cases to add some noise. + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. */ - if (!i && !(prandom_u32() % 16)) - i = 2; + i = max_t(int, i, (prandom_u32() & 7) * 2); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ From e9261476184be1abd486c9434164b2acbe0ed6c2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:12 +0200 Subject: [PATCH 601/803] tcp: dynamically allocate the perturb table used by source ports We'll need to further increase the size of this table and it's likely that at some point its size will not be suitable anymore for a static table. Let's allocate it on boot from inet_hashinfo2_init(), which is called from tcp_init(). Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 63bb4902f018..48ca07853068 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -731,7 +731,8 @@ EXPORT_SYMBOL_GPL(inet_unhash); * privacy, this only consumes 1 KB of kernel memory. */ #define INET_TABLE_PERTURB_SHIFT 8 -static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; +#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, @@ -774,7 +775,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - net_get_random_once(table_perturb, sizeof(table_perturb)); + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); @@ -912,6 +914,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); + + /* this one is used for source ports of outgoing connections */ + table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, + sizeof(*table_perturb), GFP_KERNEL); + if (!table_perturb) + panic("TCP: failed to alloc table_perturb"); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) From 4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:13 +0200 Subject: [PATCH 602/803] tcp: increase source port perturb table to 2^16 Moshe Kol, Amit Klein, and Yossi Gilad reported being able to accurately identify a client by forcing it to emit only 40 times more connections than there are entries in the table_perturb[] table. The previous two improvements consisting in resalting the secret every 10s and adding randomness to each port selection only slightly improved the situation, and the current value of 2^8 was too small as it's not very difficult to make a client emit 10k connections in less than 10 seconds. Thus we're increasing the perturb table from 2^8 to 2^16 so that the same precision now requires 2.6M connections, which is more difficult in this time frame and harder to hide as a background activity. The impact is that the table now uses 256 kB instead of 1 kB, which could mostly affect devices making frequent outgoing connections. However such components usually target a small set of destinations (load balancers, database clients, perf assessment tools), and in practice only a few entries will be visited, like before. A live test at 1 million connections per second showed no performance difference from the previous value. Reported-by: Moshe Kol Reported-by: Yossi Gilad Reported-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 48ca07853068..cc5f66328b47 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -726,11 +726,12 @@ EXPORT_SYMBOL_GPL(inet_unhash); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. - * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, - * we use 256 instead to really give more isolation and - * privacy, this only consumes 1 KB of kernel memory. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. */ -#define INET_TABLE_PERTURB_SHIFT 8 +#define INET_TABLE_PERTURB_SHIFT 16 #define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) static u32 *table_perturb; From e8161345ddbb66e449abde10d2fdce93f867eba9 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 May 2022 10:46:14 +0200 Subject: [PATCH 603/803] tcp: drop the hash_32() part from the index calculation In commit 190cc82489f4 ("tcp: change source port randomizarion at connect() time"), the table_perturb[] array was introduced and an index was taken from the port_offset via hash_32(). But it turns out that hash_32() performs a multiplication while the input here comes from the output of SipHash in secure_seq, that is well distributed enough to avoid the need for yet another hash. Suggested-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index cc5f66328b47..a5d57fa679ca 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -778,7 +778,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, net_get_random_once(table_perturb, INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); - index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; From 5a7c5f70c743c6cf32b44b05bd6b19d4ad82f49d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 3 May 2022 15:14:28 +0300 Subject: [PATCH 604/803] selftests: ocelot: tc_flower_chains: specify conform-exceed action for policer As discussed here with Ido Schimmel: https://patchwork.kernel.org/project/netdevbpf/patch/20220224102908.5255-2-jianbol@nvidia.com/ the default conform-exceed action is "reclassify", for a reason we don't really understand. The point is that hardware can't offload that police action, so not specifying "conform-exceed" was always wrong, even though the command used to work in hardware (but not in software) until the kernel started adding validation for it. Fix the command used by the selftest by making the policer drop on exceed, and pass the packet to the next action (goto) on conform. Fixes: 8cd6b020b644 ("selftests: ocelot: add some example VCAP IS1, IS2 and ES0 tc offloads") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220503121428.842906-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index eaf8a04a7ca5..10e54bcca7a9 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -190,7 +190,7 @@ setup_prepare() tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ - action police rate 50mbit burst 64k \ + action police rate 50mbit burst 64k conform-exceed drop/pipe \ action goto chain $(IS2 1 0) } From 170f37d6aa6ad4582eefd7459015de79e244536e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 3 May 2022 00:09:31 -0400 Subject: [PATCH 605/803] block: Do not call folio_next() on an unreferenced folio It is unsafe to call folio_next() on a folio unless you hold a reference on it that prevents it from being split or freed. After returning from the iterator, iomap calls folio_end_writeback() which may drop the last reference to the page, or allow the page to be split. If that happens, the iterator will not advance far enough through the bio_vec, leading to assertion failures like the BUG() in folio_end_writeback() that checks we're not trying to end writeback on a page not currently under writeback. Other assertion failures were also seen, but they're all explained by this one bug. Fix the bug by remembering where the next folio starts before returning from the iterator. There are other ways of fixing this bug, but this seems the simplest. Reported-by: Darrick J. Wong Tested-by: Darrick J. Wong Reported-by: Brian Foster Tested-by: Brian Foster Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/bio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 278cc81cc1e7..00450fd86bb4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -269,6 +269,7 @@ struct folio_iter { size_t offset; size_t length; /* private: for use by the iterator */ + struct folio *_next; size_t _seg_count; int _i; }; @@ -283,6 +284,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, PAGE_SIZE * (bvec->bv_page - &fi->folio->page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_next = folio_next(fi->folio); fi->_i = i; } @@ -290,9 +292,10 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) { fi->_seg_count -= fi->length; if (fi->_seg_count) { - fi->folio = folio_next(fi->folio); + fi->folio = fi->_next; fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); + fi->_next = folio_next(fi->folio); } else if (fi->_i + 1 < bio->bi_vcnt) { bio_first_folio(fi, bio, fi->_i + 1); } else { From b9ff43dd27434dbd850b908e2e0e1f6e794efd9b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 27 Apr 2022 17:01:28 -0400 Subject: [PATCH 606/803] mm/readahead: Fix readahead with large folios Reading 100KB chunks from a big file (eg dd bs=100K) leads to poor readahead behaviour. Studying the traces in detail, I noticed two problems. The first is that we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read. Instead, we need to set the readahead flag on the first folio _after_ the one which contains the last byte that we're reading. The second is that we were looking for the index of the folio with the readahead flag set to exactly match the start + size - async_size. If we've rounded this, either down (as previously) or up (as now), we'll think we hit a folio marked as readahead by a different read, and try to read the wrong pages. So round the expected index to the order of the folio we hit. Reported-by: Guo Xuenan Signed-off-by: Matthew Wilcox (Oracle) --- mm/readahead.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 8e3775829513..4a60cdb64262 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -474,7 +474,8 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - if (mark - index < (1UL << order)) + mark = round_up(mark, 1UL << order); + if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); if (err) @@ -555,8 +556,9 @@ static void ondemand_readahead(struct readahead_control *ractl, struct file_ra_state *ra = ractl->ra; unsigned long max_pages = ra->ra_pages; unsigned long add_pages; - unsigned long index = readahead_index(ractl); - pgoff_t prev_index; + pgoff_t index = readahead_index(ractl); + pgoff_t expected, prev_index; + unsigned int order = folio ? folio_order(folio) : 0; /* * If the request exceeds the readahead window, allow the read to @@ -575,8 +577,9 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - if ((index == (ra->start + ra->size - ra->async_size) || - index == (ra->start + ra->size))) { + expected = round_up(ra->start + ra->size - ra->async_size, + 1UL << order); + if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; @@ -662,7 +665,7 @@ readit: } ractl->_index = ra->start; - page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0); + page_cache_ra_order(ractl, ra, order); } void page_cache_sync_ra(struct readahead_control *ractl, From 4071bf121d59944d5cd2238de0642f3d7995a997 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 4 May 2022 13:58:47 +0800 Subject: [PATCH 607/803] NFC: netlink: fix sleep in atomic bug when firmware download timeout There are sleep in atomic bug that could cause kernel panic during firmware download process. The root cause is that nlmsg_new with GFP_KERNEL parameter is called in fw_dnld_timeout which is a timer handler. The call trace is shown below: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265 Call Trace: kmem_cache_alloc_node __alloc_skb nfc_genl_fw_download_done call_timer_fn __run_timers.part.0 run_timer_softirq __do_softirq ... The nlmsg_new with GFP_KERNEL parameter may sleep during memory allocation process, and the timer handler is run as the result of a "software interrupt" that should not call any other function that could sleep. This patch changes allocation mode of netlink message from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic bug. The GFP_ATOMIC flag makes memory allocation operation could be used in atomic context. Fixes: 9674da8759df ("NFC: Add firmware upload netlink command") Fixes: 9ea7187c53f6 ("NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD") Signed-off-by: Duoming Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220504055847.38026-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/nfc/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f184b0db79d4..7c62417ccfd7 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, genlmsg_end(msg, hdr); - genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; From 2d3535ed2c73fee356160aed40714b27be07442a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 2 May 2022 11:34:16 +0200 Subject: [PATCH 608/803] MAINTAINERS: update the GPIO git tree entry My git tree has become the de facto main GPIO tree. Update the MAINTAINERS file to reflect that. Signed-off-by: Bartosz Golaszewski Reported-by: Baruch Siach Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e8..9d47c5e7c6ae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8385,7 +8385,7 @@ M: Linus Walleij M: Bartosz Golaszewski L: linux-gpio@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git F: Documentation/ABI/obsolete/sysfs-gpio F: Documentation/ABI/testing/gpio-cdev F: Documentation/admin-guide/gpio/ From 8707898e22fd665bc1d7b18b809be4b56ce25bdd Mon Sep 17 00:00:00 2001 From: Thomas Pfaff Date: Mon, 2 May 2022 13:28:29 +0200 Subject: [PATCH 609/803] genirq: Synchronize interrupt thread startup A kernel hang can be observed when running setserial in a loop on a kernel with force threaded interrupts. The sequence of events is: setserial open("/dev/ttyXXX") request_irq() do_stuff() -> serial interrupt -> wake(irq_thread) desc->threads_active++; close() free_irq() kthread_stop(irq_thread) synchronize_irq() <- hangs because desc->threads_active != 0 The thread is created in request_irq() and woken up, but does not get on a CPU to reach the actual thread function, which would handle the pending wake-up. kthread_stop() sets the should stop condition which makes the thread immediately exit, which in turn leaves the stale threads_active count around. This problem was introduced with commit 519cc8652b3a, which addressed a interrupt sharing issue in the PCIe code. Before that commit free_irq() invoked synchronize_irq(), which waits for the hard interrupt handler and also for associated threads to complete. To address the PCIe issue synchronize_irq() was replaced with __synchronize_hardirq(), which only waits for the hard interrupt handler to complete, but not for threaded handlers. This was done under the assumption, that the interrupt thread already reached the thread function and waits for a wake-up, which is guaranteed to be handled before acting on the stop condition. The problematic case, that the thread would not reach the thread function, was obviously overlooked. Make sure that the interrupt thread is really started and reaches thread_fn() before returning from __setup_irq(). This utilizes the existing wait queue in the interrupt descriptor. The wait queue is unused for non-shared interrupts. For shared interrupts the usage might cause a spurious wake-up of a waiter in synchronize_irq() or the completion of a threaded handler might cause a spurious wake-up of the waiter for the ready flag. Both are harmless and have no functional impact. [ tglx: Amended changelog ] Fixes: 519cc8652b3a ("genirq: Synchronize only with single thread on free_irq()") Signed-off-by: Thomas Pfaff Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/552fe7b4-9224-b183-bb87-a8f36d335690@pcs.com --- kernel/irq/internals.h | 2 ++ kernel/irq/irqdesc.c | 2 ++ kernel/irq/manage.c | 39 +++++++++++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 99cbdf55a8bd..f09c60393e55 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -29,12 +29,14 @@ extern struct irqaction chained_action; * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready */ enum { IRQTF_RUNTHREAD, IRQTF_WARNED, IRQTF_AFFINITY, IRQTF_FORCED_THREAD, + IRQTF_READY, }; /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 939d21cd55c3..0099b87dd853 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -407,6 +407,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, lockdep_set_class(&desc->lock, &irq_desc_lock_class); mutex_init(&desc->request_mutex); init_rcu_head(&desc->rcu); + init_waitqueue_head(&desc->wait_for_threads); desc_set_defaults(irq, desc, node, affinity, owner); irqd_set(&desc->irq_data, flags); @@ -575,6 +576,7 @@ int __init early_irq_init(void) raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); mutex_init(&desc[i].request_mutex); + init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); } return arch_early_irq_init(); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c03f71d5ec10..e3e245a4fd70 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1248,6 +1248,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) raw_spin_unlock_irq(&desc->lock); } +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + /* * Interrupt handler thread */ @@ -1259,6 +1284,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); + irq_thread_set_ready(desc, action); + sched_set_fifo(current); if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, @@ -1683,8 +1710,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1780,14 +1805,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; From 26a08f8bad3e1f98d3153f939fb8cd330da4cb26 Mon Sep 17 00:00:00 2001 From: Scott Chen Date: Mon, 25 Apr 2022 17:00:20 +0800 Subject: [PATCH 610/803] USB: serial: pl2303: add device id for HP LM930 Display Add the device id for the HPLM930Display which is a PL2303GC based device. Signed-off-by: Scott Chen Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 88b284d61681..1d878d05a658 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index c5406452b774..732f9b13ad5d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -135,6 +135,7 @@ #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 #define HP_LD381_PRODUCT_ID 0x0f7f +#define HP_LM930_PRODUCT_ID 0x0f9b #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 From 714adff9a6271b5f1664b04c944b598141ebfe73 Mon Sep 17 00:00:00 2001 From: Sven Schwermer Date: Mon, 25 Apr 2022 16:34:49 +0200 Subject: [PATCH 611/803] USB: serial: option: add Fibocom L610 modem The L610 modem has 3 USB configurations that are configurable via the AT command AT+GTUSBMODE={31,32,33} which make the modem enumerate with the following interfaces, respectively: 31: Modem + NV + MOS + Diag + LOG + AT + AT 32: ECM + Modem + NV + MOS + Diag + LOG + AT + AT 33: RNDIS + Modem + NV + MOS + Diag + LOG + AT + AT A detailed description of the USB configuration for each mode follows: +GTUSBMODE: 31 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=124 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d10 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=400mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 32 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=122 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d11 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 33 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=126 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4d11 Rev= 0.00 S: Manufacturer=FIBOCOM S: Product=L610 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=4096ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Sven Schwermer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1364ce7f0abf..cd82dd44f72d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2123,6 +2123,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, + { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ From 07989eb981d862f7f2be68d233d753f2e7ccc119 Mon Sep 17 00:00:00 2001 From: Sven Schwermer Date: Mon, 25 Apr 2022 16:34:50 +0200 Subject: [PATCH 612/803] USB: serial: option: add Fibocom MA510 modem The MA510 modem has 3 USB configurations that are configurable via the AT command AT+GTUSBMODE={30,31,32} which make the modem enumerate with the following interfaces, respectively: 30: Diag + QDSS + Modem + RMNET 31: Diag + Modem + AT + ECM 32: Modem + AT + ECM The first configuration (30) reuses u-blox R410M's VID/PID with identical interface configuration. A detailed description of the USB configuration for each mode follows: +GTUSBMODE: 30 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#= 19 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=90b2 Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 31 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#= 99 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0106 Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 3 IfCount= 2 Cls=02(comm.) Sub=00 Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 4 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 4 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms +GTUSBMODE: 32 -------------- T: Bus=03 Lev=01 Prnt=01 Port=06 Cnt=04 Dev#=100 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=010a Rev= 0.00 S: Manufacturer=Fibocom MA510 Modem S: Product=Fibocom MA510 Modem S: SerialNumber=55e2695b C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 2 IfCount= 2 Cls=02(comm.) Sub=00 Prot=00 I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 3 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Sven Schwermer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index cd82dd44f72d..152ad882657d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2129,6 +2129,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) }, /* Fibocom MA510 (ECM mode w/ diag intf.) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ From 171865dab096da1ab980a32eeea5d1b88cd7bc50 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 21 Apr 2022 18:42:28 +0900 Subject: [PATCH 613/803] gpio: visconti: Fix fwnode of GPIO IRQ The fwnode of GPIO IRQ must be set to its own fwnode, not the fwnode of the parent IRQ. Therefore, this sets own fwnode instead of the parent IRQ fwnode to GPIO IRQ's. Fixes: 2ad74f40dacc ("gpio: visconti: Add Toshiba Visconti GPIO support") Signed-off-by: Nobuhiro Iwamatsu Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-visconti.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-visconti.c b/drivers/gpio/gpio-visconti.c index 47455810bdb9..e6534ea1eaa7 100644 --- a/drivers/gpio/gpio-visconti.c +++ b/drivers/gpio/gpio-visconti.c @@ -130,7 +130,6 @@ static int visconti_gpio_probe(struct platform_device *pdev) struct gpio_irq_chip *girq; struct irq_domain *parent; struct device_node *irq_parent; - struct fwnode_handle *fwnode; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -150,14 +149,12 @@ static int visconti_gpio_probe(struct platform_device *pdev) } parent = irq_find_host(irq_parent); + of_node_put(irq_parent); if (!parent) { dev_err(dev, "No IRQ parent domain\n"); return -ENODEV; } - fwnode = of_node_to_fwnode(irq_parent); - of_node_put(irq_parent); - ret = bgpio_init(&priv->gpio_chip, dev, 4, priv->base + GPIO_IDATA, priv->base + GPIO_OSET, @@ -180,7 +177,7 @@ static int visconti_gpio_probe(struct platform_device *pdev) girq = &priv->gpio_chip.irq; girq->chip = irq_chip; - girq->fwnode = fwnode; + girq->fwnode = of_node_to_fwnode(dev->of_node); girq->parent_domain = parent; girq->child_to_parent_hwirq = visconti_gpio_child_to_parent_hwirq; girq->populate_parent_alloc_arg = visconti_gpio_populate_parent_fwspec; From 870b1eee2d844727b06e238c121d260bc5645580 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Mon, 25 Apr 2022 13:58:40 +0800 Subject: [PATCH 614/803] USB: serial: qcserial: add support for Sierra Wireless EM7590 Add support for Sierra Wireless EM7590 0xc080/0xc081 compositions. Signed-off-by: Ethan Yang Link: https://lore.kernel.org/r/20220425055840.5693-1-etyang@sierrawireless.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index c18bf8164bc2..586ef5551e76 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ + {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */ + {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From 2685027fca387b602ae565bff17895188b803988 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 27 Apr 2022 10:54:28 -0400 Subject: [PATCH 615/803] cgroup/cpuset: Remove cpus_allowed/mems_allowed setup in cpuset_init_smp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 3 places where the cpu and node masks of the top cpuset can be initialized in the order they are executed: 1) start_kernel -> cpuset_init() 2) start_kernel -> cgroup_init() -> cpuset_bind() 3) kernel_init_freeable() -> do_basic_setup() -> cpuset_init_smp() The first cpuset_init() call just sets all the bits in the masks. The second cpuset_bind() call sets cpus_allowed and mems_allowed to the default v2 values. The third cpuset_init_smp() call sets them back to v1 values. For systems with cgroup v2 setup, cpuset_bind() is called once. As a result, cpu and memory node hot add may fail to update the cpu and node masks of the top cpuset to include the newly added cpu or node in a cgroup v2 environment. For systems with cgroup v1 setup, cpuset_bind() is called again by rebind_subsystem() when the v1 cpuset filesystem is mounted as shown in the dmesg log below with an instrumented kernel. [ 2.609781] cpuset_bind() called - v2 = 1 [ 3.079473] cpuset_init_smp() called [ 7.103710] cpuset_bind() called - v2 = 0 smp_init() is called after the first two init functions. So we don't have a complete list of active cpus and memory nodes until later in cpuset_init_smp() which is the right time to set up effective_cpus and effective_mems. To fix this cgroup v2 mask setup problem, the potentially incorrect cpus_allowed & mems_allowed setting in cpuset_init_smp() are removed. For cgroup v2 systems, the initial cpuset_bind() call will set the masks correctly. For cgroup v1 systems, the second call to cpuset_bind() will do the right setup. cc: stable@vger.kernel.org Signed-off-by: Waiman Long Tested-by: Feng Tang Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 9390bfd9f1cd..71a418858a5e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = { */ void __init cpuset_init_smp(void) { - cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); - top_cpuset.mems_allowed = node_states[N_MEMORY]; + /* + * cpus_allowd/mems_allowed set to v2 values in the initial + * cpuset_bind() call will be reset to v1 values in another + * cpuset_bind() call when v1 cpuset is mounted. + */ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); From 9f73f1aef98b2fa7252c0a89be64840271ce8ea0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 1 Apr 2022 15:29:37 +0800 Subject: [PATCH 616/803] btrfs: force v2 space cache usage for subpage mount [BUG] For a 4K sector sized btrfs with v1 cache enabled and only mounted on systems with 4K page size, if it's mounted on subpage (64K page size) systems, it can cause the following warning on v1 space cache: BTRFS error (device dm-1): csum mismatch on free space cache BTRFS warning (device dm-1): failed to load free space cache for block group 84082688, rebuilding it now Although not a big deal, as kernel can rebuild it without problem, such warning will bother end users, especially if they want to switch the same btrfs seamlessly between different page sized systems. [CAUSE] V1 free space cache is still using fixed PAGE_SIZE for various bitmap, like BITS_PER_BITMAP. Such hard-coded PAGE_SIZE usage will cause various mismatch, from v1 cache size to checksum. Thus kernel will always reject v1 cache with a different PAGE_SIZE with csum mismatch. [FIX] Although we should fix v1 cache, it's already going to be marked deprecated soon. And we have v2 cache based on metadata (which is already fully subpage compatible), and it has almost everything superior than v1 cache. So just force subpage mount to use v2 cache on mount. Reported-by: Matt Corallo CC: stable@vger.kernel.org # 5.15+ Link: https://lore.kernel.org/linux-btrfs/61aa27d1-30fc-c1a9-f0f4-9df544395ec3@bluematt.me/ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20e70eb88465..3e0acc362233 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3657,6 +3657,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info; + /* + * V1 space cache has some hardcoded PAGE_SIZE usage, and is + * going to be deprecated. + * + * Force to use v2 cache for subpage case. + */ + btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); + btrfs_set_and_info(fs_info, FREE_SPACE_TREE, + "forcing free space tree for sector size %u with page size %lu", + sectorsize, PAGE_SIZE); + btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); From 549577127afeb759c29cdeeff1bdccd86e6c0dbf Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 3 May 2022 14:10:04 -0700 Subject: [PATCH 617/803] btrfs: zoned: move non-changing condition check out of the loop btrfs_zone_activate() checks if block_group->alloc_offset == block_group->zone_capacity every time it iterates the loop. But, it is not depending on the index. Move out the check and do it only once. Fixes: f9a912a3c45f ("btrfs: zoned: make zone activation multi stripe capable") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1b1b310c3c51..b5eb794c1e23 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1835,6 +1835,12 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } + /* No space left */ + if (block_group->alloc_offset == block_group->zone_capacity) { + ret = false; + goto out_unlock; + } + for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; physical = map->stripes[i].physical; @@ -1842,12 +1848,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) if (device->zone_info->max_active_zones == 0) continue; - /* No space left */ - if (block_group->alloc_offset == block_group->zone_capacity) { - ret = false; - goto out_unlock; - } - if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; From ceb4f60830a7cd38ff47b7dd54e9e06ddbaf413c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 3 May 2022 14:10:05 -0700 Subject: [PATCH 618/803] btrfs: zoned: activate block group properly on unlimited active zone device btrfs_zone_activate() checks if it activated all the underlying zones in the loop. However, that check never hit on an unlimited activate zone device (max_active_zones == 0). Fortunately, it still works without ENOSPC because btrfs_zone_activate() returns true in the end, even if block_group->zone_is_active == 0. But, it is confusing to have non zone_is_active block group still usable for allocation. Also, we are wasting CPU time to iterate the loop every time btrfs_zone_activate() is called for the blog groups. Since error case in the loop is handled by out_unlock, we can just set zone_is_active and do the list stuff after the loop. Fixes: f9a912a3c45f ("btrfs: zoned: make zone activation multi stripe capable") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5eb794c1e23..d31b0eda210f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1853,24 +1853,18 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) ret = false; goto out_unlock; } - - /* Successfully activated all the zones */ - if (i == map->num_stripes - 1) - block_group->zone_is_active = 1; - - } + + /* Successfully activated all the zones */ + block_group->zone_is_active = 1; spin_unlock(&block_group->lock); - if (block_group->zone_is_active) { - /* For the active block group list */ - btrfs_get_block_group(block_group); + /* For the active block group list */ + btrfs_get_block_group(block_group); - spin_lock(&fs_info->zone_active_bgs_lock); - list_add_tail(&block_group->active_bg_list, - &fs_info->zone_active_bgs); - spin_unlock(&fs_info->zone_active_bgs_lock); - } + spin_lock(&fs_info->zone_active_bgs_lock); + list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); return true; From 750ee454908e90a8792b1e2b157c2948da86e926 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 May 2022 11:57:02 +0100 Subject: [PATCH 619/803] btrfs: fix assertion failure when logging directory key range item When inserting a key range item (BTRFS_DIR_LOG_INDEX_KEY) while logging a directory, we don't expect the insertion to fail with -EEXIST, because we are holding the directory's log_mutex and we have dropped all existing BTRFS_DIR_LOG_INDEX_KEY keys from the log tree before we started to log the directory. However it's possible that during the logging we attempt to insert the same BTRFS_DIR_LOG_INDEX_KEY key twice, but for this to happen we need to race with insertions of items from other inodes in the subvolume's tree while we are logging a directory. Here's how this can happen: 1) We are logging a directory with inode number 1000 that has its items spread across 3 leaves in the subvolume's tree: leaf A - has index keys from the range 2 to 20 for example. The last item in the leaf corresponds to a dir item for index number 20. All these dir items were created in a past transaction. leaf B - has index keys from the range 22 to 100 for example. It has no keys from other inodes, all its keys are dir index keys for our directory inode number 1000. Its first key is for the dir item with a sequence number of 22. All these dir items were also created in a past transaction. leaf C - has index keys for our directory for the range 101 to 120 for example. This leaf also has items from other inodes, and its first item corresponds to the dir item for index number 101 for our directory with inode number 1000; 2) When we finish processing the items from leaf A at log_dir_items(), we log a BTRFS_DIR_LOG_INDEX_KEY key with an offset of 21 and a last offset of 21, meaning the log is authoritative for the index range from 21 to 21 (a single sequence number). At this point leaf B was not yet modified in the current transaction; 3) When we return from log_dir_items() we have released our read lock on leaf B, and have set *last_offset_ret to 21 (index number of the first item on leaf B minus 1); 4) Some other task inserts an item for other inode (inode number 1001 for example) into leaf C. That resulted in pushing some items from leaf C into leaf B, in order to make room for the new item, so now leaf B has dir index keys for the sequence number range from 22 to 102 and leaf C has the dir items for the sequence number range 103 to 120; 5) At log_directory_changes() we call log_dir_items() again, passing it a 'min_offset' / 'min_key' value of 22 (*last_offset_ret from step 3 plus 1, so 21 + 1). Then btrfs_search_forward() leaves us at slot 0 of leaf B, since leaf B was modified in the current transaction. We have also initialized 'last_old_dentry_offset' to 20 after calling btrfs_previous_item() at log_dir_items(), as it left us at the last item of leaf A, which refers to the dir item with sequence number 20; 6) We then call process_dir_items_leaf() to process the dir items of leaf B, and when we process the first item, corresponding to slot 0, sequence number 22, we notice the dir item was created in a past transaction and its sequence number is greater than the value of *last_old_dentry_offset + 1 (20 + 1), so we decide to log again a BTRFS_DIR_LOG_INDEX_KEY key with an offset of 21 and an end range of 21 (key.offset - 1 == 22 - 1 == 21), which results in an -EEXIST error from insert_dir_log_key(), as we have already inserted that key at step 2, triggering the assertion at process_dir_items_leaf(). The trace produced in dmesg is like the following: assertion failed: ret != -EEXIST, in fs/btrfs/tree-log.c:3857 [198255.980839][ T7460] ------------[ cut here ]------------ [198255.981666][ T7460] kernel BUG at fs/btrfs/ctree.h:3617! [198255.983141][ T7460] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [198255.984080][ T7460] CPU: 0 PID: 7460 Comm: repro-ghost-dir Not tainted 5.18.0-5314c78ac373-misc-next+ [198255.986027][ T7460] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [198255.988600][ T7460] RIP: 0010:assertfail.constprop.0+0x1c/0x1e [198255.989465][ T7460] Code: 8b 4c 89 (...) [198255.992599][ T7460] RSP: 0018:ffffc90007387188 EFLAGS: 00010282 [198255.993414][ T7460] RAX: 000000000000003d RBX: 0000000000000065 RCX: 0000000000000000 [198255.996056][ T7460] RDX: 0000000000000001 RSI: ffffffff8b62b180 RDI: fffff52000e70e24 [198255.997668][ T7460] RBP: ffffc90007387188 R08: 000000000000003d R09: ffff8881f0e16507 [198255.999199][ T7460] R10: ffffed103e1c2ca0 R11: 0000000000000001 R12: 00000000ffffffef [198256.000683][ T7460] R13: ffff88813befc630 R14: ffff888116c16e70 R15: ffffc90007387358 [198256.007082][ T7460] FS: 00007fc7f7c24640(0000) GS:ffff8881f0c00000(0000) knlGS:0000000000000000 [198256.009939][ T7460] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [198256.014133][ T7460] CR2: 0000560bb16d0b78 CR3: 0000000140b34005 CR4: 0000000000170ef0 [198256.015239][ T7460] Call Trace: [198256.015674][ T7460] [198256.016313][ T7460] log_dir_items.cold+0x16/0x2c [198256.018858][ T7460] ? replay_one_extent+0xbf0/0xbf0 [198256.025932][ T7460] ? release_extent_buffer+0x1d2/0x270 [198256.029658][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.031114][ T7460] ? lock_acquired+0xbe/0x660 [198256.032633][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.034386][ T7460] ? lock_release+0xcf/0x8a0 [198256.036152][ T7460] log_directory_changes+0xf9/0x170 [198256.036993][ T7460] ? log_dir_items+0xba0/0xba0 [198256.037661][ T7460] ? do_raw_write_unlock+0x7d/0xe0 [198256.038680][ T7460] btrfs_log_inode+0x233b/0x26d0 [198256.041294][ T7460] ? log_directory_changes+0x170/0x170 [198256.042864][ T7460] ? btrfs_attach_transaction_barrier+0x60/0x60 [198256.045130][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.046568][ T7460] ? lock_release+0xcf/0x8a0 [198256.047504][ T7460] ? lock_downgrade+0x420/0x420 [198256.048712][ T7460] ? ilookup5_nowait+0x81/0xa0 [198256.049747][ T7460] ? lock_downgrade+0x420/0x420 [198256.050652][ T7460] ? do_raw_spin_unlock+0xa9/0x100 [198256.051618][ T7460] ? __might_resched+0x128/0x1c0 [198256.052511][ T7460] ? __might_sleep+0x66/0xc0 [198256.053442][ T7460] ? __kasan_check_read+0x11/0x20 [198256.054251][ T7460] ? iget5_locked+0xbd/0x150 [198256.054986][ T7460] ? run_delayed_iput_locked+0x110/0x110 [198256.055929][ T7460] ? btrfs_iget+0xc7/0x150 [198256.056630][ T7460] ? btrfs_orphan_cleanup+0x4a0/0x4a0 [198256.057502][ T7460] ? free_extent_buffer+0x13/0x20 [198256.058322][ T7460] btrfs_log_inode+0x2654/0x26d0 [198256.059137][ T7460] ? log_directory_changes+0x170/0x170 [198256.060020][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.060930][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.061905][ T7460] ? lock_contended+0x770/0x770 [198256.062682][ T7460] ? btrfs_log_inode_parent+0xd04/0x1750 [198256.063582][ T7460] ? lock_downgrade+0x420/0x420 [198256.064432][ T7460] ? preempt_count_sub+0x18/0xc0 [198256.065550][ T7460] ? __mutex_lock+0x580/0xdc0 [198256.066654][ T7460] ? stack_trace_save+0x94/0xc0 [198256.068008][ T7460] ? __kasan_check_write+0x14/0x20 [198256.072149][ T7460] ? __mutex_unlock_slowpath+0x12a/0x430 [198256.073145][ T7460] ? mutex_lock_io_nested+0xcd0/0xcd0 [198256.074341][ T7460] ? wait_for_completion_io_timeout+0x20/0x20 [198256.075345][ T7460] ? lock_downgrade+0x420/0x420 [198256.076142][ T7460] ? lock_contended+0x770/0x770 [198256.076939][ T7460] ? do_raw_spin_lock+0x1c0/0x1c0 [198256.078401][ T7460] ? btrfs_sync_file+0x5e6/0xa40 [198256.080598][ T7460] btrfs_log_inode_parent+0x523/0x1750 [198256.081991][ T7460] ? wait_current_trans+0xc8/0x240 [198256.083320][ T7460] ? lock_downgrade+0x420/0x420 [198256.085450][ T7460] ? btrfs_end_log_trans+0x70/0x70 [198256.086362][ T7460] ? rcu_read_lock_sched_held+0x16/0x80 [198256.087544][ T7460] ? lock_release+0xcf/0x8a0 [198256.088305][ T7460] ? lock_downgrade+0x420/0x420 [198256.090375][ T7460] ? dget_parent+0x8e/0x300 [198256.093538][ T7460] ? do_raw_spin_lock+0x1c0/0x1c0 [198256.094918][ T7460] ? lock_downgrade+0x420/0x420 [198256.097815][ T7460] ? do_raw_spin_unlock+0xa9/0x100 [198256.101822][ T7460] ? dget_parent+0xb7/0x300 [198256.103345][ T7460] btrfs_log_dentry_safe+0x48/0x60 [198256.105052][ T7460] btrfs_sync_file+0x629/0xa40 [198256.106829][ T7460] ? start_ordered_ops.constprop.0+0x120/0x120 [198256.109655][ T7460] ? __fget_files+0x161/0x230 [198256.110760][ T7460] vfs_fsync_range+0x6d/0x110 [198256.111923][ T7460] ? start_ordered_ops.constprop.0+0x120/0x120 [198256.113556][ T7460] __x64_sys_fsync+0x45/0x70 [198256.114323][ T7460] do_syscall_64+0x5c/0xc0 [198256.115084][ T7460] ? syscall_exit_to_user_mode+0x3b/0x50 [198256.116030][ T7460] ? do_syscall_64+0x69/0xc0 [198256.116768][ T7460] ? do_syscall_64+0x69/0xc0 [198256.117555][ T7460] ? do_syscall_64+0x69/0xc0 [198256.118324][ T7460] ? sysvec_call_function_single+0x57/0xc0 [198256.119308][ T7460] ? asm_sysvec_call_function_single+0xa/0x20 [198256.120363][ T7460] entry_SYSCALL_64_after_hwframe+0x44/0xae [198256.121334][ T7460] RIP: 0033:0x7fc7fe97b6ab [198256.122067][ T7460] Code: 0f 05 48 (...) [198256.125198][ T7460] RSP: 002b:00007fc7f7c23950 EFLAGS: 00000293 ORIG_RAX: 000000000000004a [198256.126568][ T7460] RAX: ffffffffffffffda RBX: 00007fc7f7c239f0 RCX: 00007fc7fe97b6ab [198256.127942][ T7460] RDX: 0000000000000002 RSI: 000056167536bcf0 RDI: 0000000000000004 [198256.129302][ T7460] RBP: 0000000000000004 R08: 0000000000000000 R09: 000000007ffffeb8 [198256.130670][ T7460] R10: 00000000000001ff R11: 0000000000000293 R12: 0000000000000001 [198256.132046][ T7460] R13: 0000561674ca8140 R14: 00007fc7f7c239d0 R15: 000056167536dab8 [198256.133403][ T7460] Fix this by treating -EEXIST as expected at insert_dir_log_key() and have it update the item with an end offset corresponding to the maximum between the previously logged end offset and the new requested end offset. The end offsets may be different due to dir index key deletions that happened as part of unlink operations while we are logging a directory (triggered when fsyncing some other inode parented by the directory) or during renames which always attempt to log a single dir index deletion. Reported-by: Zygo Blaxell Link: https://lore.kernel.org/linux-btrfs/YmyefE9mc2xl5ZMz@hungrycats.org/ Fixes: 732d591a5d6c12 ("btrfs: stop copying old dir items when logging a directory") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 11399c8eed87..e65633686378 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3721,11 +3721,29 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, key.offset = first_offset; key.type = BTRFS_DIR_LOG_INDEX_KEY; ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); - if (ret) + /* + * -EEXIST is fine and can happen sporadically when we are logging a + * directory and have concurrent insertions in the subvolume's tree for + * items from other inodes and that result in pushing off some dir items + * from one leaf to another in order to accommodate for the new items. + * This results in logging the same dir index range key. + */ + if (ret && ret != -EEXIST) return ret; item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_dir_log_item); + if (ret == -EEXIST) { + const u64 curr_end = btrfs_dir_log_end(path->nodes[0], item); + + /* + * btrfs_del_dir_entries_in_log() might have been called during + * an unlink between the initial insertion of this key and the + * current update, or we might be logging a single entry deletion + * during a rename, so set the new last_offset to the max value. + */ + last_offset = max(last_offset, curr_end); + } btrfs_set_dir_log_end(path->nodes[0], item, last_offset); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(path); @@ -3849,13 +3867,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, ret = insert_dir_log_key(trans, log, dst_path, ino, *last_old_dentry_offset + 1, key.offset - 1); - /* - * -EEXIST should never happen because when we - * log a directory in full mode (LOG_INODE_ALL) - * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from - * the log tree. - */ - ASSERT(ret != -EEXIST); if (ret < 0) return ret; } @@ -7031,12 +7042,12 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, /* * Other concurrent task might be logging the old directory, * as it can be triggered when logging other inode that had or - * still has a dentry in the old directory. So take the old - * directory's log_mutex to prevent getting an -EEXIST when - * logging a key to record the deletion, or having that other - * task logging the old directory get an -EEXIST if it attempts - * to log the same key after we just did it. In both cases that - * would result in falling back to a transaction commit. + * still has a dentry in the old directory. We lock the old + * directory's log_mutex to ensure the deletion of the old + * name is persisted, because during directory logging we + * delete all BTRFS_DIR_LOG_INDEX_KEY keys and the deletion of + * the old name's dir index item is in the delayed items, so + * it could be missed by an in progress directory logging. */ mutex_lock(&old_dir->log_mutex); ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir), From 3e1ad196385c65c1454aceab1226d9a4baca27d5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 3 May 2022 17:35:25 +0200 Subject: [PATCH 620/803] btrfs: sysfs: export the balance paused state of exclusive operation The new state allowing device addition with paused balance is not exported to user space so it can't recognize it and actually start the operation. Fixes: efc0e69c2fea ("btrfs: introduce exclusive operation BALANCE_PAUSED state") CC: stable@vger.kernel.org # 5.17 Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 17389a42a3ab..ba78ca5aabbb 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -922,6 +922,9 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj, case BTRFS_EXCLOP_BALANCE: str = "balance\n"; break; + case BTRFS_EXCLOP_BALANCE_PAUSED: + str = "balance paused\n"; + break; case BTRFS_EXCLOP_DEV_ADD: str = "device add\n"; break; From 01e01f5c89773c600a9f0b32c888de0146066c3a Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Sun, 1 May 2022 20:58:28 +0300 Subject: [PATCH 621/803] usb: cdc-wdm: fix reading stuck on device close cdc-wdm tracks whether a response reading request is in-progress and blocks the next request from being sent until the previous request is completed. As soon as last user closes the cdc-wdm device file, the driver cancels any ongoing requests, resets the pending response counter, but leaves the response reading in-progress flag (WDM_RESPONDING) untouched. So if the user closes the device file during the response receive request is being performed, no more data will be obtained from the modem. The request will be cancelled, effectively preventing the WDM_RESPONDING flag from being reseted. Keeping the flag set will prevent a new response receive request from being sent, permanently blocking the read path. The read path will staying blocked until the module will be reloaded or till the modem will be re-attached. This stuck has been observed with a Huawei E3372 modem attached to an OpenWrt router and using the comgt utility to set up a network connection. Fix this issue by clearing the WDM_RESPONDING flag on the device file close. Without this fix, the device reading stuck can be easily reproduced in a few connection establishing attempts. With this fix, a load test for modem connection re-establishing worked for several hours without any issues. Fixes: 922a5eadd5a3 ("usb: cdc-wdm: Fix race between autosuspend and reading from the device") Signed-off-by: Sergey Ryazanov Cc: stable Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/20220501175828.8185-1-ryazanov.s.a@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 7f2c83f299d3..eebe782380fb 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file) poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; + clear_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); unpoison_urbs(desc); From bbc126ae381cf0a27822c1f822d0aeed74cc40d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 2 May 2022 10:04:56 +0200 Subject: [PATCH 622/803] usb: typec: tcpci: Don't skip cleanup in .remove() on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error value in an i2c remove callback results in an error message being emitted by the i2c core, but otherwise it doesn't make a difference. The device goes away anyhow and the devm cleanups are called. In this case the remove callback even returns early without stopping the tcpm worker thread and various timers. A work scheduled on the work queue, or a firing timer after tcpci_remove() returned probably results in a use-after-free situation because the regmap and driver data were freed. So better make sure that tcpci_unregister_port() is called even if disabling the irq failed. Also emit a more specific error message instead of the i2c core's "remove failed (EIO), will be ignored" and return 0 to suppress the core's warning. This patch is (also) a preparation for making i2c remove callbacks return void. Fixes: 3ba76256fc4e ("usb: typec: tcpci: mask event interrupts when remove driver") Signed-off-by: Uwe Kleine-König Cc: stable Acked-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220502080456.21568-1-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index e07d26a3cd8e..f33e08eb7670 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -877,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client) /* Disable chip interrupts before unregistering port */ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); if (err < 0) - return err; + dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err)); tcpci_unregister_port(chip->tcpci); From b81ac4395bbeaf36e078dea1a48c02dd97b76235 Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Tue, 3 May 2022 15:10:38 -0500 Subject: [PATCH 623/803] usb: gadget: uvc: allow for application to cleanly shutdown Several types of kernel panics can occur due to timing during the uvc gadget removal. This appears to be a problem with gadget resources being managed by both the client application's v4l2 open/close and the UDC gadget bind/unbind. Since the concept of USB_GADGET_DELAYED_STATUS doesn't exist for unbind, add a wait to allow for the application to close out. Some examples of the panics that can occur are: <1>[ 1147.652313] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028 <4>[ 1147.652510] Call trace: <4>[ 1147.652514] usb_gadget_disconnect+0x74/0x1f0 <4>[ 1147.652516] usb_gadget_deactivate+0x38/0x168 <4>[ 1147.652520] usb_function_deactivate+0x54/0x90 <4>[ 1147.652524] uvc_function_disconnect+0x14/0x38 <4>[ 1147.652527] uvc_v4l2_release+0x34/0xa0 <4>[ 1147.652537] __fput+0xdc/0x2c0 <4>[ 1147.652540] ____fput+0x10/0x1c <4>[ 1147.652545] task_work_run+0xe4/0x12c <4>[ 1147.652549] do_notify_resume+0x108/0x168 <1>[ 282.950561][ T1472] Unable to handle kernel NULL pointer dereference at virtual address 00000000000005b8 <6>[ 282.953111][ T1472] Call trace: <6>[ 282.953121][ T1472] usb_function_deactivate+0x54/0xd4 <6>[ 282.953134][ T1472] uvc_v4l2_release+0xac/0x1e4 <6>[ 282.953145][ T1472] v4l2_release+0x134/0x1f0 <6>[ 282.953167][ T1472] __fput+0xf4/0x428 <6>[ 282.953178][ T1472] ____fput+0x14/0x24 <6>[ 282.953193][ T1472] task_work_run+0xac/0x130 <3>[ 213.410077][ T29] configfs-gadget gadget: uvc: Failed to queue request (-108). <1>[ 213.410116][ T29] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000003 <6>[ 213.413460][ T29] Call trace: <6>[ 213.413474][ T29] uvcg_video_pump+0x1f0/0x384 <6>[ 213.413489][ T29] process_one_work+0x2a4/0x544 <6>[ 213.413502][ T29] worker_thread+0x350/0x784 <6>[ 213.413515][ T29] kthread+0x2ac/0x320 <6>[ 213.413528][ T29] ret_from_fork+0x10/0x30 Signed-off-by: Dan Vacura Cc: stable Link: https://lore.kernel.org/r/20220503201039.71720-1-w36195@motorola.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 25 +++++++++++++++++++++++++ drivers/usb/gadget/function/uvc.h | 2 ++ drivers/usb/gadget/function/uvc_v4l2.c | 3 ++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 71bb5e477dba..d37965867b23 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -890,13 +890,37 @@ static void uvc_function_unbind(struct usb_configuration *c, { struct usb_composite_dev *cdev = c->cdev; struct uvc_device *uvc = to_uvc(f); + long wait_ret = 1; uvcg_info(f, "%s()\n", __func__); + /* If we know we're connected via v4l2, then there should be a cleanup + * of the device from userspace either via UVC_EVENT_DISCONNECT or + * though the video device removal uevent. Allow some time for the + * application to close out before things get deleted. + */ + if (uvc->func_connected) { + uvcg_dbg(f, "waiting for clean disconnect\n"); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(500)); + uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); + } + device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); + if (uvc->func_connected) { + /* Wait for the release to occur to ensure there are no longer any + * pending operations that may cause panics when resources are cleaned + * up. + */ + uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(1000)); + uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret); + } + usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); @@ -915,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) mutex_init(&uvc->video.mutex); uvc->state = UVC_STATE_DISCONNECTED; + init_waitqueue_head(&uvc->func_connected_queue); opts = fi_to_f_uvc_opts(fi); mutex_lock(&opts->lock); diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index c3607a32b986..886103a1fe9b 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ struct uvc_device { struct usb_function func; struct uvc_video video; bool func_connected; + wait_queue_head_t func_connected_queue; /* Descriptors */ struct { diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index a2c78690c5c2..fd8f73bb726d 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -253,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh, static void uvc_v4l2_disable(struct uvc_device *uvc) { - uvc->func_connected = false; uvc_function_disconnect(uvc); uvcg_video_enable(&uvc->video, 0); uvcg_free_buffers(&uvc->video.queue); + uvc->func_connected = false; + wake_up_interruptible(&uvc->func_connected_queue); } static int From 447ee1516f19f534a228dda237eddb202f23e163 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 5 May 2022 20:46:21 +0800 Subject: [PATCH 624/803] tty/serial: digicolor: fix possible null-ptr-deref in digicolor_uart_probe() It will cause null-ptr-deref when using 'res', if platform_get_resource() returns NULL, so move using 'res' after devm_ioremap_resource() that will check it to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Fixes: 5930cb3511df ("serial: driver for Conexant Digicolor USART") Signed-off-by: Yang Yingliang Reviewed-by: Baruch Siach Cc: stable Link: https://lore.kernel.org/r/20220505124621.1592697-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/digicolor-usart.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c index 6d70fea76bb3..e37a917b9dbb 100644 --- a/drivers/tty/serial/digicolor-usart.c +++ b/drivers/tty/serial/digicolor-usart.c @@ -471,11 +471,10 @@ static int digicolor_uart_probe(struct platform_device *pdev) if (IS_ERR(uart_clk)) return PTR_ERR(uart_clk); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dp->port.mapbase = res->start; - dp->port.membase = devm_ioremap_resource(&pdev->dev, res); + dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(dp->port.membase)) return PTR_ERR(dp->port.membase); + dp->port.mapbase = res->start; irq = platform_get_irq(pdev, 0); if (irq < 0) From bb0b197aadd928f52ce6f01f0ee977f0a08cf1be Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:26 +0200 Subject: [PATCH 625/803] serial: 8250_mtk: Fix UART_EFR register address On MediaTek SoCs, the UART IP is 16550A compatible, but there are some specific quirks: we are declaring a register shift of 2, but this is only valid for the majority of the registers, as there are some that are out of the standard layout. Specifically, this driver is using definitions from serial_reg.h, where we have a UART_EFR register defined as 2: this results in a 0x8 offset, but there we have the FCR register instead. The right offset for the EFR register on MediaTek UART is at 0x98, so, following the decimal definition convention in serial_reg.h and accounting for the register left shift of two, add and use the correct register address for this IP, defined as decimal 38, so that the final calculation results in (0x26 << 2) = 0x98. Fixes: bdbd0a7f8f03 ("serial: 8250-mtk: modify baudrate setting") Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-2-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f4a0caa56f84..cd62a5f34014 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -37,6 +37,7 @@ #define MTK_UART_IER_RTSI 0x40 /* Enable RTS Modem status interrupt */ #define MTK_UART_IER_CTSI 0x80 /* Enable CTS Modem status interrupt */ +#define MTK_UART_EFR 38 /* I/O: Extended Features Register */ #define MTK_UART_EFR_EN 0x10 /* Enable enhancement feature */ #define MTK_UART_EFR_RTS 0x40 /* Enable hardware rx flow control */ #define MTK_UART_EFR_CTS 0x80 /* Enable hardware tx flow control */ @@ -169,7 +170,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up) MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0) @@ -232,7 +233,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) int lcr = serial_in(up, UART_LCR); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, MTK_UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, lcr); lcr = serial_in(up, UART_LCR); @@ -241,7 +242,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR); serial_out(up, MTK_UART_ESCAPE_EN, 0x00); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI | @@ -255,8 +256,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable hw flow control*/ - serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); serial_out(up, UART_LCR, lcr); @@ -270,8 +271,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); /*enable sw flow control */ - serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 | - (serial_in(up, UART_EFR) & + serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 | + (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); serial_out(up, UART_XON1, START_CHAR(port->state->port.tty)); From 6f81fdded0d024c7d4084d434764f30bca1cd6b1 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:27 +0200 Subject: [PATCH 626/803] serial: 8250_mtk: Make sure to select the right FEATURE_SEL Set the FEATURE_SEL at probe time to make sure that BIT(0) is enabled: this guarantees that when the port is configured as AP UART, the right register layout is interpreted by the UART IP. Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-3-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index cd62a5f34014..28e36459642c 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -54,6 +54,9 @@ #define MTK_UART_TX_TRIGGER 1 #define MTK_UART_RX_TRIGGER MTK_UART_RX_SIZE +#define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ +#define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -569,6 +572,10 @@ static int mtk8250_probe(struct platform_device *pdev) uart.dma = data->dma; #endif + /* Set AP UART new register map */ + writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase + + (MTK_UART_FEATURE_SEL << uart.port.regshift)); + /* Disable Rate Fix function */ writel(0x0, uart.port.membase + (MTK_UART_RATE_FIX << uart.port.regshift)); From e1bfdbc7daca171c74a577b3dd0b36d76bb0ffcc Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 27 Apr 2022 15:23:28 +0200 Subject: [PATCH 627/803] serial: 8250_mtk: Fix register address for XON/XOFF character The XON1/XOFF1 character registers are at offset 0xa0 and 0xa8 respectively, so we cannot use the definition in serial_port.h. Fixes: bdbd0a7f8f03 ("serial: 8250-mtk: modify baudrate setting") Signed-off-by: AngeloGioacchino Del Regno Cc: stable Link: https://lore.kernel.org/r/20220427132328.228297-4-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 28e36459642c..21053db93ff1 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -57,6 +57,9 @@ #define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */ #define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */ +#define MTK_UART_XON1 40 /* I/O: Xon character 1 */ +#define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */ + #ifdef CONFIG_SERIAL_8250_DMA enum dma_rx_status { DMA_RX_START = 0, @@ -278,8 +281,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) (serial_in(up, MTK_UART_EFR) & (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)))); - serial_out(up, UART_XON1, START_CHAR(port->state->port.tty)); - serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty)); + serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty)); serial_out(up, UART_LCR, lcr); mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI); mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI); From fd442e5ba30aaa75ea47b32149e7a3110dc20a46 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:31 +0200 Subject: [PATCH 628/803] tty: n_gsm: fix buffer over-read in gsm_dlci_data() 'len' is decreased after each octet that has its EA bit set to 0, which means that the value is encoded with additional octets. However, the final octet does not decreases 'len' which results in 'len' being one byte too long. A buffer over-read may occur in tty_insert_flip_string() as it tries to read one byte more than the passed content size of 'data'. Decrease 'len' also for the final octet which has the EA bit set to 1 to write the correct number of bytes from the internal receive buffer to the virtual tty. Fixes: 2e124b4a390c ("TTY: switch tty_flip_buffer_push") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index a38b922bcbc1..9b0b435cf26e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1658,6 +1658,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) if (len == 0) return; } + len--; slen++; tty = tty_port_tty_get(port); if (tty) { From edd5f60c340086891fab094ad61270d6c80f9ca4 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:32 +0200 Subject: [PATCH 629/803] tty: n_gsm: fix mux activation issues in gsm_config() The current implementation activates the mux if it was restarted and opens the control channel if the mux was previously closed and we are now acting as initiator instead of responder, which is the default setting. This has two issues. 1) No mux is activated if we keep all default values and only switch to initiator. The control channel is not allocated but will be opened next which results in a NULL pointer dereference. 2) Switching the configuration after it was once configured while keeping the initiator value the same will not reopen the control channel if it was closed due to parameter incompatibilities. The mux remains dead. Fix 1) by always activating the mux if it is dead after configuration. Fix 2) by always opening the control channel after mux activation. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-2-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 9b0b435cf26e..bcb714031d69 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2352,6 +2352,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm, static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) { + int ret = 0; int need_close = 0; int need_restart = 0; @@ -2419,10 +2420,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) * FIXME: We need to separate activation/deactivation from adding * and removing from the mux array */ - if (need_restart) - gsm_activate_mux(gsm); - if (gsm->initiator && need_close) - gsm_dlci_begin_open(gsm->dlci[0]); + if (gsm->dead) { + ret = gsm_activate_mux(gsm); + if (ret) + return ret; + if (gsm->initiator) + gsm_dlci_begin_open(gsm->dlci[0]); + } return 0; } From 9361ebfbb79fd1bc8594a487c01ad52cdaa391ea Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 4 May 2022 10:17:33 +0200 Subject: [PATCH 630/803] tty: n_gsm: fix invalid gsmtty_write_room() result gsmtty_write() does not prevent the user to use the full fifo size of 4096 bytes as allocated in gsm_dlci_alloc(). However, gsmtty_write_room() tries to limit the return value by 'TX_SIZE' and returns a negative value if the fifo has more than 'TX_SIZE' bytes stored. This is obviously wrong as 'TX_SIZE' is defined as 512. Define 'TX_SIZE' to the fifo size and use it accordingly for allocation to keep the current behavior. Return the correct remaining size of the fifo in gsmtty_write_room() via kfifo_avail(). Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Cc: stable@vger.kernel.org Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20220504081733.3494-3-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index bcb714031d69..fd8b86dde525 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -137,6 +137,7 @@ struct gsm_dlci { int retries; /* Uplink tty if active */ struct tty_port port; /* The tty bound to this DLCI if there is one */ +#define TX_SIZE 4096 /* Must be power of 2. */ struct kfifo fifo; /* Queue fifo for the DLCI */ int adaption; /* Adaption layer in use */ int prev_adaption; @@ -1731,7 +1732,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) return NULL; spin_lock_init(&dlci->lock); mutex_init(&dlci->mutex); - if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) { + if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) { kfree(dlci); return NULL; } @@ -2976,8 +2977,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = { * Virtual tty side */ -#define TX_SIZE 512 - /** * gsm_modem_upd_via_data - send modem bits via convergence layer * @dlci: channel @@ -3217,7 +3216,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty) struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; - return TX_SIZE - kfifo_len(&dlci->fifo); + return kfifo_avail(&dlci->fifo); } static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) From 401fb66a355eb0f22096cf26864324f8e63c7d78 Mon Sep 17 00:00:00 2001 From: Indan Zupancic Date: Thu, 5 May 2022 13:47:50 +0200 Subject: [PATCH 631/803] fsl_lpuart: Don't enable interrupts too early If an irq is pending when devm_request_irq() is called, the irq handler will cause a NULL pointer access because initialisation is not done yet. Fixes: 9d7ee0e28da59 ("tty: serial: lpuart: avoid report NULL interrupt") Cc: stable Signed-off-by: Indan Zupancic Link: https://lore.kernel.org/r/20220505114750.45423-1-Indan.Zupancic@mep-info.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 87789872f400..be12fee94db5 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2664,6 +2664,7 @@ static int lpuart_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct lpuart_port *sport; struct resource *res; + irq_handler_t handler; int ret; sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL); @@ -2741,17 +2742,11 @@ static int lpuart_probe(struct platform_device *pdev) if (lpuart_is_32(sport)) { lpuart_reg.cons = LPUART32_CONSOLE; - ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0, - DRIVER_NAME, sport); + handler = lpuart32_int; } else { lpuart_reg.cons = LPUART_CONSOLE; - ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0, - DRIVER_NAME, sport); + handler = lpuart_int; } - - if (ret) - goto failed_irq_request; - ret = uart_add_one_port(&lpuart_reg, &sport->port); if (ret) goto failed_attach_port; @@ -2773,13 +2768,18 @@ static int lpuart_probe(struct platform_device *pdev) sport->port.rs485_config(&sport->port, &sport->port.rs485); + ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0, + DRIVER_NAME, sport); + if (ret) + goto failed_irq_request; + return 0; +failed_irq_request: failed_get_rs485: failed_reset: uart_remove_one_port(&lpuart_reg, &sport->port); failed_attach_port: -failed_irq_request: lpuart_disable_clks(sport); failed_clock_enable: failed_out_of_range: From 6997fbd7a3dafa754f81d541498ace35b43246d8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 5 May 2022 10:53:53 +0900 Subject: [PATCH 632/803] net: rds: use maybe_get_net() when acquiring refcount on TCP sockets Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for delayed works queued in rds_wq might be invoked after a net namespace's refcount already reached 0. Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq), it is guaranteed that we can instead use maybe_get_net() from delayed work functions until rds_tcp_exit_net() returns. Note that I'm not convinced that all works which might access a net namespace are already queued in rds_wq by the moment rds_tcp_exit_net() calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net() will fail to wait for work functions, and kmem_cache_free() could be called from net_free() before maybe_get_net() is called from rds_tcp_tune(). Reported-by: Eric Dumazet Fixes: 3a58f13a881ed351 ("net: rds: acquire refcount on TCP sockets") Signed-off-by: Tetsuo Handa Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jp Signed-off-by: Jakub Kicinski --- net/rds/tcp.c | 12 +++++++++--- net/rds/tcp.h | 2 +- net/rds/tcp_connect.c | 5 ++++- net/rds/tcp_listen.c | 5 ++++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2f638f8b7b1e..73ee2771093d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -487,11 +487,11 @@ struct rds_tcp_net { /* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation. */ -void rds_tcp_tune(struct socket *sock) +bool rds_tcp_tune(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct rds_tcp_net *rtn; tcp_sock_set_nodelay(sock->sk); lock_sock(sk); @@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock) * a process which created this net namespace terminated. */ if (!sk->sk_net_refcnt) { + if (!maybe_get_net(net)) { + release_sock(sk); + return false; + } sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); } + rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; @@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); + return true; } static void rds_tcp_accept_worker(struct work_struct *work) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index dc8d745d6857..f8b5930d7b34 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -49,7 +49,7 @@ struct rds_tcp_statistics { }; /* tcp.c */ -void rds_tcp_tune(struct socket *sock); +bool rds_tcp_tune(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5461d77fff4f..f0c477c5d1db 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) if (ret < 0) goto out; - rds_tcp_tune(sock); + if (!rds_tcp_tune(sock)) { + ret = -EINVAL; + goto out; + } if (isv6) { sin6.sin6_family = AF_INET6; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 09cadd556d1e..7edf2e69d3fe 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock) __module_get(new_sock->ops->owner); rds_tcp_keepalive(new_sock); - rds_tcp_tune(new_sock); + if (!rds_tcp_tune(new_sock)) { + ret = -EINVAL; + goto out; + } inet = inet_sk(new_sock->sk); From e333eed63a091a09bd0db191b7710c594c6e995b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 May 2022 11:31:03 -0300 Subject: [PATCH 633/803] net: phy: micrel: Do not use kszphy_suspend/resume for KSZ8061 Since commit f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") the following NULL pointer dereference is observed on a board with KSZ8061: # udhcpc -i eth0 udhcpc: started, v1.35.0 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = f73cef4e [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 196 Comm: ifconfig Not tainted 5.15.37-dirty #94 Hardware name: Freescale i.MX6 SoloX (Device Tree) PC is at kszphy_config_reset+0x10/0x114 LR is at kszphy_resume+0x24/0x64 ... The KSZ8061 phy_driver structure does not have the .probe/..driver_data fields, which means that priv is not allocated. This causes the NULL pointer dereference inside kszphy_config_reset(). Fix the problem by using the generic suspend/resume functions as before. Another alternative would be to provide the .probe and .driver_data information into the structure, but to be on the safe side, let's just restore Ethernet functionality by using the generic suspend/resume. Cc: stable@vger.kernel.org Fixes: f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220504143104.1286960-1-festevam@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fc53b71dc872..7c243cedde9f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2782,8 +2782,8 @@ static struct phy_driver ksphy_driver[] = { .config_init = ksz8061_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, - .suspend = kszphy_suspend, - .resume = kszphy_resume, + .suspend = genphy_suspend, + .resume = genphy_resume, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, From 15f03ffe4bb951e982457f44b6cf6b06ef4cbb93 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 May 2022 11:31:04 -0300 Subject: [PATCH 634/803] net: phy: micrel: Pass .probe for KS8737 Since commit f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") the kszphy_suspend/ resume hooks are used. These functions require the probe function to be called so that priv can be allocated. Otherwise, a NULL pointer dereference happens inside kszphy_config_reset(). Cc: stable@vger.kernel.org Fixes: f1131b9c23fb ("net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices") Reported-by: Andrew Lunn Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220504143104.1286960-2-festevam@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 7c243cedde9f..9d7dafed3931 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2657,6 +2657,7 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KS8737", /* PHY_BASIC_FEATURES */ .driver_data = &ks8737_type, + .probe = kszphy_probe, .config_init = kszphy_config_init, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, From e1a7ac6f3ba6e157adcd0ca94d92a401f1943f56 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:38 +0200 Subject: [PATCH 635/803] ping: fix address binding wrt vrf When ping_group_range is updated, 'ping' uses the DGRAM ICMP socket, instead of an IP raw socket. In this case, 'ping' is unable to bind its socket to a local address owned by a vrflite. Before the patch: $ sysctl -w net.ipv4.ping_group_range='0 2147483647' $ ip link add blue type vrf table 10 $ ip link add foo type dummy $ ip link set foo master blue $ ip link set foo up $ ip addr add 192.168.1.1/24 dev foo $ ip addr add 2001::1/64 dev foo $ ip vrf exec blue ping -c1 -I 192.168.1.1 192.168.1.2 ping: bind: Cannot assign requested address $ ip vrf exec blue ping6 -c1 -I 2001::1 2001::2 ping6: bind icmp socket: Cannot assign requested address CC: stable@vger.kernel.org Fixes: 1b69c6d0ae90 ("net: Introduce L3 Master device abstraction") Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b88..aa9a11b20d18 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int chk_addr_ret; if (addr_len < sizeof(*addr)) @@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; + chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk), addr->sin_addr.s_addr, @@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -ENODEV; } } + + if (!dev && sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, scoped); rcu_read_unlock(); From e71b7f1f44d3d88c677769c85ef0171caf9fc89f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:39 +0200 Subject: [PATCH 636/803] selftests: add ping test with ping_group_range tuned The 'ping' utility is able to manage two kind of sockets (raw or icmp), depending on the sysctl ping_group_range. By default, ping_group_range is set to '1 0', which forces ping to use an ip raw socket. Let's replay the ping tests by allowing 'ping' to use the ip icmp socket. After the previous patch, ipv4 tests results are the same with both kinds of socket. For ipv6, there are a lot a new failures (the previous patch fixes only two cases). Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 47c4d4b4a44a..54701c8b0cd7 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -810,10 +810,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -2348,10 +2354,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ From 85db6352fc8a158a893151baa1716463d34a20d0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 4 May 2022 11:09:14 +0300 Subject: [PATCH 637/803] net: Fix features skip in for_each_netdev_feature() The find_next_netdev_feature() macro gets the "remaining length", not bit index. Passing "bit - 1" for the following iteration is wrong as it skips the adjacent bit. Pass "bit" instead. Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Link: https://lore.kernel.org/r/20220504080914.1918-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2c6b9e416225..7c2d77d75a88 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -169,7 +169,7 @@ enum { #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) -/* Finds the next feature with the highest number of the range of start till 0. +/* Finds the next feature with the highest number of the range of start-1 till 0. */ static inline int find_next_netdev_feature(u64 feature, unsigned long start) { @@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) for ((bit) = find_next_netdev_feature((mask_addr), \ NETDEV_FEATURE_COUNT); \ (bit) >= 0; \ - (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) + (bit) = find_next_netdev_feature((mask_addr), (bit))) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ From 4e707344e18525b4edf5c2bc2e3eb60692e8c92e Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Wed, 4 May 2022 14:59:08 -0400 Subject: [PATCH 638/803] MAINTAINERS: add missing files for bonding definition The bonding entry did not include additional include files that have been added nor did it reference the documentation. Add these references for completeness. Signed-off-by: Jonathan Toppins Link: https://lore.kernel.org/r/903ed2906b93628b38a2015664a20d2802042863.1651690748.git.jtoppins@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index edc96cdb85e8..15037bd1ce48 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3571,8 +3571,9 @@ M: Andy Gospodarek L: netdev@vger.kernel.org S: Supported W: http://sourceforge.net/projects/bonding/ +F: Documentation/networking/bonding.rst F: drivers/net/bonding/ -F: include/net/bonding.h +F: include/net/bond* F: include/uapi/linux/if_bonding.h BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER From 5b53a405e4658580e1faf7c217db3f55a21ba849 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 5 May 2022 16:17:29 +0200 Subject: [PATCH 639/803] s390/dasd: fix data corruption for ESE devices For ESE devices we get an error when accessing an unformatted track. The handling of this error will return zero data for read requests and format the track on demand before writing to it. To do this the code needs to distinguish between read and write requests. This is done with data from the blocklayer request. A pointer to the blocklayer request is stored in the CQR. If there is an error on the device an ERP request is built to do error recovery. While the ERP request is mostly a copy of the original CQR the pointer to the blocklayer request is not copied to not accidentally pass it back to the blocklayer without cleanup. This leads to the error that during ESE handling after an ERP request was built it is not possible to determine the IO direction. This leads to the formatting of a track for read requests which might in turn lead to data corruption. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20220505141733.1989450-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 8 +++++++- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_int.h | 12 ++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8e87a31e329d..76d13c5ff205 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1639,6 +1639,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, unsigned long now; int nrf_suppressed = 0; int fp_suppressed = 0; + struct request *req; u8 *sense = NULL; int expires; @@ -1739,7 +1740,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } if (dasd_ese_needs_format(cqr->block, irb)) { - if (rq_data_dir((struct request *)cqr->callback_data) == READ) { + req = dasd_get_callback_data(cqr); + if (!req) { + cqr->status = DASD_CQR_ERROR; + return; + } + if (rq_data_dir(req) == READ) { device->discipline->ese_read(cqr, irb); cqr->status = DASD_CQR_SUCCESS; cqr->stopclk = now; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8410a25a65c1..e3583502aca2 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3145,7 +3145,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, sector_t curr_trk; int rc; - req = cqr->callback_data; + req = dasd_get_callback_data(cqr); block = cqr->block; base = block->base; private = base->private; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3b7af00a7825..07f9670ea61e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -756,6 +756,18 @@ dasd_check_blocksize(int bsize) return 0; } +/* + * return the callback data of the original request in case there are + * ERP requests build on top of it + */ +static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr) +{ + while (cqr->refers) + cqr = cqr->refers; + + return cqr->callback_data; +} + /* externals in dasd.c */ #define DASD_PROFILE_OFF 0 #define DASD_PROFILE_ON 1 From 71f3871657370dbbaf942a1c758f64e49a36c70f Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 5 May 2022 16:17:30 +0200 Subject: [PATCH 640/803] s390/dasd: prevent double format of tracks for ESE devices For ESE devices we get an error for write operations on an unformatted track. Afterwards the track will be formatted and the IO operation restarted. When using alias devices a track might be accessed by multiple requests simultaneously and there is a race window that a track gets formatted twice resulting in data loss. Prevent this by remembering the amount of formatted tracks when starting a request and comparing this number before actually formatting a track on the fly. If the number has changed there is a chance that the current track was finally formatted in between. As a result do not format the track and restart the current IO to check. The number of formatted tracks does not match the overall number of formatted tracks on the device and it might wrap around but this is no problem. It is only needed to recognize that a track has been formatted at all in between. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20220505141733.1989450-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 7 +++++++ drivers/s390/block/dasd_eckd.c | 19 +++++++++++++++++-- drivers/s390/block/dasd_int.h | 2 ++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 76d13c5ff205..d62a4c673962 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1422,6 +1422,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) if (!cqr->lpm) cqr->lpm = dasd_path_get_opm(device); } + /* + * remember the amount of formatted tracks to prevent double format on + * ESE devices + */ + if (cqr->block) + cqr->trkcount = atomic_read(&cqr->block->trkcount); + if (cqr->cpmode == 1) { rc = ccw_device_tm_start(device->cdev, cqr->cpaddr, (long) cqr, cqr->lpm); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e3583502aca2..649eba51e048 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3083,13 +3083,24 @@ static int dasd_eckd_format_device(struct dasd_device *base, } static bool test_and_set_format_track(struct dasd_format_entry *to_format, - struct dasd_block *block) + struct dasd_ccw_req *cqr) { + struct dasd_block *block = cqr->block; struct dasd_format_entry *format; unsigned long flags; bool rc = false; spin_lock_irqsave(&block->format_lock, flags); + if (cqr->trkcount != atomic_read(&block->trkcount)) { + /* + * The number of formatted tracks has changed after request + * start and we can not tell if the current track was involved. + * To avoid data corruption treat it as if the current track is + * involved + */ + rc = true; + goto out; + } list_for_each_entry(format, &block->format_list, list) { if (format->track == to_format->track) { rc = true; @@ -3109,6 +3120,7 @@ static void clear_format_track(struct dasd_format_entry *format, unsigned long flags; spin_lock_irqsave(&block->format_lock, flags); + atomic_inc(&block->trkcount); list_del_init(&format->list); spin_unlock_irqrestore(&block->format_lock, flags); } @@ -3170,8 +3182,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, } format->track = curr_trk; /* test if track is already in formatting by another thread */ - if (test_and_set_format_track(format, block)) + if (test_and_set_format_track(format, cqr)) { + /* this is no real error so do not count down retries */ + cqr->retries++; return ERR_PTR(-EEXIST); + } fdata.start_unit = curr_trk; fdata.stop_unit = curr_trk; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 07f9670ea61e..83b918b84b4a 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -187,6 +187,7 @@ struct dasd_ccw_req { void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; unsigned int proc_bytes; /* bytes for partial completion */ + unsigned int trkcount; /* count formatted tracks */ }; /* @@ -610,6 +611,7 @@ struct dasd_block { struct list_head format_list; spinlock_t format_lock; + atomic_t trkcount; }; struct dasd_attention_data { From cd68c48ea15c85f1577a442dc4c285e112ff1b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 5 May 2022 16:17:31 +0200 Subject: [PATCH 641/803] s390/dasd: Fix read for ESE with blksize < 4k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading unformatted tracks on ESE devices, the corresponding memory areas are simply set to zero for each segment. This is done incorrectly for blocksizes < 4096. There are two problems. First, the increment of dst is done using the counter of the loop (off), which is increased by blksize every iteration. This leads to a much bigger increment for dst as actually intended. Second, the increment of dst is done before the memory area is set to 0, skipping a significant amount of bytes of memory. This leads to illegal overwriting of memory and ultimately to a kernel panic. This is not a problem with 4k blocksize because blk_queue_max_segment_size is set to PAGE_SIZE, always resulting in a single iteration for the inner segment loop (bv.bv_len == blksize). The incorrectly used 'off' value to increment dst is 0 and the correct memory area is used. In order to fix this for blksize < 4k, increment dst correctly using the blksize and only do it at the end of the loop. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-4-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 649eba51e048..e46461b4d8a7 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3285,12 +3285,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb) cqr->proc_bytes = blk_count * blksize; return 0; } - if (dst && !skip_block) { - dst += off; + if (dst && !skip_block) memset(dst, 0, blksize); - } else { + else skip_block--; - } + dst += blksize; blk_count++; } } From b9c10f68e23c13f56685559a0d6fdaca9f838324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 5 May 2022 16:17:32 +0200 Subject: [PATCH 642/803] s390/dasd: Fix read inconsistency for ESE DASD devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read requests that return with NRF error are partially completed in dasd_eckd_ese_read(). The function keeps track of the amount of processed bytes and the driver will eventually return this information back to the block layer for further processing via __dasd_cleanup_cqr() when the request is in the final stage of processing (from the driver's perspective). For this, blk_update_request() is used which requires the number of bytes to complete the request. As per documentation the nr_bytes parameter is described as follows: "number of bytes to complete for @req". This was mistakenly interpreted as "number of bytes _left_ for @req" leading to new requests with incorrect data length. The consequence are inconsistent and completely wrong read requests as data from random memory areas are read back. Fix this by correctly specifying the amount of bytes that should be used to complete the request. Fixes: 5e6bdd37c552 ("s390/dasd: fix data corruption for thin provisioned devices") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-5-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d62a4c673962..ba6d78789660 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2778,8 +2778,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) * complete a request partially. */ if (proc_bytes) { - blk_update_request(req, BLK_STS_OK, - blk_rq_bytes(req) - proc_bytes); + blk_update_request(req, BLK_STS_OK, proc_bytes); blk_mq_requeue_request(req, true); } else if (likely(!blk_should_fake_timeout(req->q))) { blk_mq_complete_request(req); From f1c8781ac9d87650ccf45a354c0bbfa3f9230371 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 5 May 2022 16:17:33 +0200 Subject: [PATCH 643/803] s390/dasd: Use kzalloc instead of kmalloc/memset Use kzalloc rather than duplicating its implementation, which makes code simple and easy to understand. Signed-off-by: Haowen Bai Reviewed-by: Sven Schnelle Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20220505141733.1989450-6-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e46461b4d8a7..836838f7d686 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1480,7 +1480,7 @@ static int dasd_eckd_pe_handler(struct dasd_device *device, { struct pe_handler_work_data *data; - data = kmalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA); + data = kzalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA); if (!data) { if (mutex_trylock(&dasd_pe_handler_mutex)) { data = pe_handler_worker; @@ -1488,9 +1488,6 @@ static int dasd_eckd_pe_handler(struct dasd_device *device, } else { return -ENOMEM; } - } else { - memset(data, 0, sizeof(*data)); - data->isglobal = 0; } INIT_WORK(&data->worker, do_pe_handler_work); dasd_get_device(device); From e1846cff2fe614d93a2f89461b5935678fd34bd9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:54:59 +0300 Subject: [PATCH 644/803] net: mscc: ocelot: mark traps with a bool instead of keeping them in a list Since the blamed commit, VCAP filters can appear on more than one list. If their action is "trap", they are chained on ocelot->traps via filter->trap_list. This is in addition to their normal placement on the VCAP block->rules list head. Therefore, when we free a VCAP filter, we must remove it from all lists it is a member of, including ocelot->traps. There are at least 2 bugs which are direct consequences of this design decision. First is the incorrect usage of list_empty(), meant to denote whether "filter" is chained into ocelot->traps via filter->trap_list. This does not do the correct thing, because list_empty() checks whether "head->next == head", but in our case, head->next == head->prev == NULL. So we dereference NULL pointers and die when we call list_del(). Second is the fact that not all places that should remove the filter from ocelot->traps do so. One example is ocelot_vcap_block_remove_filter(), which is where we have the main kfree(filter). By keeping freed filters in ocelot->traps we end up in a use-after-free in felix_update_trapping_destinations(). Attempting to fix all the buggy patterns is a whack-a-mole game which makes the driver unmaintainable. Actually this is what the previous patch version attempted to do: https://patchwork.kernel.org/project/netdevbpf/patch/20220503115728.834457-3-vladimir.oltean@nxp.com/ but it introduced another set of bugs, because there are other places in which create VCAP filters, not just ocelot_vcap_filter_create(): - ocelot_trap_add() - felix_tag_8021q_vlan_add_rx() - felix_tag_8021q_vlan_add_tx() Relying on the convention that all those code paths must call INIT_LIST_HEAD(&filter->trap_list) is not going to scale. So let's do what should have been done in the first place and keep a bool in struct ocelot_vcap_filter which denotes whether we are looking at a trapping rule or not. Iterating now happens over the main VCAP IS2 block->rules. The advantage is that we no longer risk having stale references to a freed filter, since it is only present in that list. Fixes: e42bd4ed09aa ("net: mscc: ocelot: keep traps in a list") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 7 ++++++- drivers/net/ethernet/mscc/ocelot.c | 11 +++-------- drivers/net/ethernet/mscc/ocelot_flower.c | 4 +--- include/soc/mscc/ocelot_vcap.h | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9e28219b223d..faccfb3f0158 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -403,6 +403,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); + struct ocelot_vcap_block *block_vcap_is2; struct ocelot_vcap_filter *trap; enum ocelot_mask_mode mask_mode; unsigned long port_mask; @@ -422,9 +423,13 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, /* We are sure that "cpu" was found, otherwise * dsa_tree_setup_default_cpu() would have failed earlier. */ + block_vcap_is2 = &ocelot->block[VCAP_IS2]; /* Make sure all traps are set up for that destination */ - list_for_each_entry(trap, &ocelot->traps, trap_list) { + list_for_each_entry(trap, &block_vcap_is2->rules, list) { + if (!trap->is_trap) + continue; + /* Figure out the current trapping destination */ if (using_tag_8021q) { /* Redirect to the tag_8021q CPU port. If timestamps diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ca71b62a44dc..20ceac81a2c2 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1622,7 +1622,7 @@ int ocelot_trap_add(struct ocelot *ocelot, int port, trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; trap->action.port_mask = 0; trap->take_ts = take_ts; - list_add_tail(&trap->trap_list, &ocelot->traps); + trap->is_trap = true; new = true; } @@ -1634,10 +1634,8 @@ int ocelot_trap_add(struct ocelot *ocelot, int port, err = ocelot_vcap_filter_replace(ocelot, trap); if (err) { trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) { - list_del(&trap->trap_list); + if (!trap->ingress_port_mask) kfree(trap); - } return err; } @@ -1657,11 +1655,8 @@ int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie) return 0; trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) { - list_del(&trap->trap_list); - + if (!trap->ingress_port_mask) return ocelot_vcap_filter_del(ocelot, trap); - } return ocelot_vcap_filter_replace(ocelot, trap); } diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 03b5e59d033e..a9b26b3002be 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -295,7 +295,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->action.cpu_copy_ena = true; filter->action.cpu_qu_num = 0; filter->type = OCELOT_VCAP_FILTER_OFFLOAD; - list_add_tail(&filter->trap_list, &ocelot->traps); + filter->is_trap = true; break; case FLOW_ACTION_POLICE: if (filter->block_id == PSFP_BLOCK_ID) { @@ -878,8 +878,6 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, ret = ocelot_flower_parse(ocelot, port, ingress, f, filter); if (ret) { - if (!list_empty(&filter->trap_list)) - list_del(&filter->trap_list); kfree(filter); return ret; } diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 7b2bf9b1fe69..de26c992f821 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -681,7 +681,6 @@ struct ocelot_vcap_id { struct ocelot_vcap_filter { struct list_head list; - struct list_head trap_list; enum ocelot_vcap_filter_type type; int block_id; @@ -695,6 +694,7 @@ struct ocelot_vcap_filter { struct ocelot_vcap_stats stats; /* For VCAP IS1 and IS2 */ bool take_ts; + bool is_trap; unsigned long ingress_port_mask; /* For VCAP ES0 */ struct ocelot_vcap_port ingress_port; From 16bbebd35629c93a8c68c6d8d28557e100bcee73 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:00 +0300 Subject: [PATCH 645/803] net: mscc: ocelot: fix last VCAP IS1/IS2 filter persisting in hardware when deleted ocelot_vcap_filter_del() works by moving the next filters over the current one, and then deleting the last filter by calling vcap_entry_set() with a del_filter which was specially created by memsetting its memory to zeroes. vcap_entry_set() then programs this to the TCAM and action RAM via the cache registers. The problem is that vcap_entry_set() is a dispatch function which looks at del_filter->block_id. But since del_filter is zeroized memory, the block_id is 0, or otherwise said, VCAP_ES0. So practically, what we do is delete the entry at the same TCAM index from VCAP ES0 instead of IS1 or IS2. The code was not always like this. vcap_entry_set() used to simply be is2_entry_set(), and then, the logic used to work. Restore the functionality by populating the block_id of the del_filter based on the VCAP block of the filter that we're deleting. This makes vcap_entry_set() know what to do. Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index c8701ac955a8..2749df593ebc 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1250,7 +1250,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter del_filter; int i, index; + /* Need to inherit the block_id so that vcap_entry_set() + * does not get confused and knows where to install it. + */ memset(&del_filter, 0, sizeof(del_filter)); + del_filter.block_id = filter->block_id; /* Gets index of the filter */ index = ocelot_vcap_block_get_filter_index(block, filter); From 6741e11880003e35802d78cc58035057934f4dab Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:01 +0300 Subject: [PATCH 646/803] net: mscc: ocelot: fix VCAP IS2 filters matching on both lookups The VCAP IS2 TCAM is looked up twice per packet, and each filter can be configured to only match during the first, second lookup, or both, or none. The blamed commit wrote the code for making VCAP IS2 filters match only on the given lookup. But right below that code, there was another line that explicitly made the lookup a "don't care", and this is overwriting the lookup we've selected. So the code had no effect. Some of the more noticeable effects of having filters match on both lookups: - in "tc -s filter show dev swp0 ingress", we see each packet matching a VCAP IS2 filter counted twice. This throws off scripts such as tools/testing/selftests/net/forwarding/tc_actions.sh and makes them fail. - a "tc-drop" action offloaded to VCAP IS2 needs a policer as well, because once the CPU port becomes a member of the destination port mask of a packet, nothing removes it, not even a PERMIT/DENY mask mode with a port mask of 0. But VCAP IS2 rules with the POLICE_ENA bit in the action vector can only appear in the first lookup. What happens when a filter matches both lookups is that the action vector is combined, and this makes the POLICE_ENA bit ineffective, since the last lookup in which it has appeared is the second one. In other words, "tc-drop" actions do not drop packets for the CPU port, dropped packets are still seen by software unless there was an FDB entry that directed those packets to some other place different from the CPU. The last bit used to work, because in the initial commit b596229448dd ("net: mscc: ocelot: Add support for tcam"), we were writing the FIRST field of the VCAP IS2 half key with a 1, not with a "don't care". The change to "don't care" was made inadvertently by me in commit c1c3993edb7c ("net: mscc: ocelot: generalize existing code for VCAP"), which I just realized, and which needs a separate fix from this one, for "stable" kernels that lack the commit blamed below. Fixes: 226e9cd82a96 ("net: mscc: ocelot: only install TCAM entries into a specific lookup and PAG") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 2749df593ebc..774cec377703 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -374,7 +374,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix, OCELOT_VCAP_BIT_0); vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0, ~filter->ingress_port_mask); - vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH, OCELOT_VCAP_BIT_ANY); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc); From 477d2b91623e682e9a8126ea92acb8f684969cc7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:02 +0300 Subject: [PATCH 647/803] net: mscc: ocelot: restrict tc-trap actions to VCAP IS2 lookup 0 Once the CPU port was added to the destination port mask of a packet, it can never be cleared, so even packets marked as dropped by the MASK_MODE of a VCAP IS2 filter will still reach it. This is why we need the OCELOT_POLICER_DISCARD to "kill dropped packets dead" and make software stop seeing them. We disallow policer rules from being put on any other chain than the one for the first lookup, but we don't do this for "drop" rules, although we should. This change is merely ascertaining that the rules dont't (completely) work and letting the user know. The blamed commit is the one that introduced the multi-chain architecture in ocelot. Prior to that, we should have always offloaded the filters to VCAP IS2 lookup 0, where they did work. Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_flower.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index a9b26b3002be..51cf241ff7d0 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -280,9 +280,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->type = OCELOT_VCAP_FILTER_OFFLOAD; break; case FLOW_ACTION_TRAP: - if (filter->block_id != VCAP_IS2) { + if (filter->block_id != VCAP_IS2 || + filter->lookup != 0) { NL_SET_ERR_MSG_MOD(extack, - "Trap action can only be offloaded to VCAP IS2"); + "Trap action can only be offloaded to VCAP IS2 lookup 0"); return -EOPNOTSUPP; } if (filter->goto_target != -1) { From 93a8417088ea570b5721d2b526337a2d3aed9fa3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 May 2022 02:55:03 +0300 Subject: [PATCH 648/803] net: mscc: ocelot: avoid corrupting hardware counters when moving VCAP filters Given the following order of operations: (1) we add filter A using tc-flower (2) we send a packet that matches it (3) we read the filter's statistics to find a hit count of 1 (4) we add a second filter B with a higher preference than A, and A moves one position to the right to make room in the TCAM for it (5) we send another packet, and this matches the second filter B (6) we read the filter statistics again. When this happens, the hit count of filter A is 2 and of filter B is 1, despite a single packet having matched each filter. Furthermore, in an alternate history, reading the filter stats a second time between steps (3) and (4) makes the hit count of filter A remain at 1 after step (6), as expected. The reason why this happens has to do with the filter->stats.pkts field, which is written to hardware through the call path below: vcap_entry_set / | \ / | \ / | \ / | \ es0_entry_set is1_entry_set is2_entry_set \ | / \ | / \ | / vcap_data_set(data.counter, ...) The primary role of filter->stats.pkts is to transport the filter hit counters from the last readout all the way from vcap_entry_get() -> ocelot_vcap_filter_stats_update() -> ocelot_cls_flower_stats(). The reason why vcap_entry_set() writes it to hardware is so that the counters (saturating and having a limited bit width) are cleared after each user space readout. The writing of filter->stats.pkts to hardware during the TCAM entry movement procedure is an unintentional consequence of the code design, because the hit count isn't up to date at this point. So at step (4), when filter A is moved by ocelot_vcap_filter_add() to make room for filter B, the hardware hit count is 0 (no packet matched on it in the meantime), but filter->stats.pkts is 1, because the last readout saw the earlier packet. The movement procedure programs the old hit count back to hardware, so this creates the impression to user space that more packets have been matched than they really were. The bug can be seen when running the gact_drop_and_ok_test() from the tc_actions.sh selftest. Fix the issue by reading back the hit count to tmp->stats.pkts before migrating the VCAP filter. Sure, this is a best-effort technique, since the packets that hit the rule between vcap_entry_get() and vcap_entry_set() won't be counted, but at least it allows the counters to be reliably used for selftests where the traffic is under control. The vcap_entry_get() name is a bit unintuitive, but it only reads back the counter portion of the TCAM entry, not the entire entry. The index from which we retrieve the counter is also a bit unintuitive (i - 1 during add, i + 1 during del), but this is the way in which TCAM entry movement works. The "entry index" isn't a stored integer for a TCAM filter, instead it is dynamically computed by ocelot_vcap_block_get_filter_index() based on the entry's position in the &block->rules list. That position (as well as block->count) is automatically updated by ocelot_vcap_filter_add_to_block() on add, and by ocelot_vcap_block_remove_filter() on del. So "i" is the new filter index, and "i - 1" or "i + 1" respectively are the old addresses of that TCAM entry (we only support installing/deleting one filter at a time). Fixes: b596229448dd ("net: mscc: ocelot: Add support for tcam") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 774cec377703..eeb4cc07dd16 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1216,6 +1216,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, struct ocelot_vcap_filter *tmp; tmp = ocelot_vcap_block_find_filter_by_index(block, i); + /* Read back the filter's counters before moving it */ + vcap_entry_get(ocelot, i - 1, tmp); vcap_entry_set(ocelot, i, tmp); } @@ -1268,6 +1270,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *tmp; tmp = ocelot_vcap_block_find_filter_by_index(block, i); + /* Read back the filter's counters before moving it */ + vcap_entry_get(ocelot, i + 1, tmp); vcap_entry_set(ocelot, i, tmp); } From 348c71344111d7a48892e3e52264ff11956fc196 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 5 May 2022 21:04:51 +0530 Subject: [PATCH 649/803] powerpc/papr_scm: Fix buffer overflow issue with CONFIG_FORTIFY_SOURCE With CONFIG_FORTIFY_SOURCE enabled, string functions will also perform dynamic checks for string size which can panic the kernel, like incase of overflow detection. In papr_scm, papr_scm_pmu_check_events function uses stat->stat_id with string operations, to populate the nvdimm_events_map array. Since stat_id variable is not NULL terminated, the kernel panics with CONFIG_FORTIFY_SOURCE enabled at boot time. Below are the logs of kernel panic: detected buffer overflow in __fortify_strlen ------------[ cut here ]------------ kernel BUG at lib/string_helpers.c:980! Oops: Exception in kernel mode, sig: 5 [#1] NIP [c00000000077dad0] fortify_panic+0x28/0x38 LR [c00000000077dacc] fortify_panic+0x24/0x38 Call Trace: [c0000022d77836e0] [c00000000077dacc] fortify_panic+0x24/0x38 (unreliable) [c00800000deb2660] papr_scm_pmu_check_events.constprop.0+0x118/0x220 [papr_scm] [c00800000deb2cb0] papr_scm_probe+0x288/0x62c [papr_scm] [c0000000009b46a8] platform_probe+0x98/0x150 Fix this issue by using kmemdup_nul() to copy the content of stat->stat_id directly to the nvdimm_events_map array. mpe: stat->stat_id comes from the hypervisor, not userspace, so there is no security exposure. Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220505153451.35503-1-kjain@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f58728d5f10d..39962c905542 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -462,7 +462,6 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu { struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; - char *statid; int index, rc, count; u32 available_events; @@ -493,14 +492,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu for (index = 0, stat = stats->scm_statistic, count = 0; index < available_events; index++, ++stat) { - statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL); - if (!statid) { + p->nvdimm_events_map[count] = kmemdup_nul(stat->stat_id, 8, GFP_KERNEL); + if (!p->nvdimm_events_map[count]) { rc = -ENOMEM; goto out_nvdimm_events_map; } - strcpy(statid, stat->stat_id); - p->nvdimm_events_map[count] = statid; count++; } p->nvdimm_events_map[count] = NULL; From 135332f34ba2662bc1e32b5c612e06a8cc41a053 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 4 May 2022 13:59:17 +0200 Subject: [PATCH 650/803] Revert "fbdev: Make fb_release() return -ENODEV if fbdev was unregistered" This reverts commit aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a. That commit attempted to fix a NULL pointer dereference, caused by the struct fb_info associated with a framebuffer device to not longer be valid when the file descriptor was closed. The issue was exposed by commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"), which added a new path that goes through the struct device removal instead of directly unregistering the fb. Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. This meant that due to this switch, the fb_info was now destroyed too early, while references still existed, while before it was simply leaked. The patch we're reverting here reinstated that leak, hence "fixed" the regression. But the proper solution is to fix the drivers to not release the fb_info too soon. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220504115917.758787-1-javierm@redhat.com --- drivers/video/fbdev/core/fbmem.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 97eb0dee411c..a6bb0e438216 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1434,10 +1434,7 @@ fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { - struct fb_info * const info = file_fb_info(file); - - if (!info) - return -ENODEV; + struct fb_info * const info = file->private_data; lock_fb_info(info); if (info->fbops->fb_release) From 89bfd4017e58faaf70411555e7f508495114e90b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 May 2022 00:04:13 +0200 Subject: [PATCH 651/803] fbdev: Prevent possible use-after-free in fb_release() Most fbdev drivers have issues with the fb_info lifetime, because call to framebuffer_release() from their driver's .remove callback, rather than doing from fbops.fb_destroy callback. Doing that will destroy the fb_info too early, while references to it may still exist, leading to a use-after-free error. To prevent this, check the fb_info reference counter when attempting to kfree the data structure in framebuffer_release(). That will leak it but at least will prevent the mentioned error. Signed-off-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220505220413.365977-1-javierm@redhat.com --- drivers/video/fbdev/core/fbsysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index 26892940c213..82e31a2d845e 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info) { if (!info) return; + + if (WARN_ON(refcount_read(&info->count))) + return; + kfree(info->apertures); kfree(info); } From 666b90b3ce9e4aac1e1deba266c3a230fb3913b0 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:04:56 +0200 Subject: [PATCH 652/803] fbdev: simplefb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220456.366090-1-javierm@redhat.com --- drivers/video/fbdev/simplefb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index 94fc9c6d0411..2c198561c338 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -84,6 +84,10 @@ struct simplefb_par { static void simplefb_clocks_destroy(struct simplefb_par *par); static void simplefb_regulators_destroy(struct simplefb_par *par); +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void simplefb_destroy(struct fb_info *info) { struct simplefb_par *par = info->par; @@ -94,6 +98,8 @@ static void simplefb_destroy(struct fb_info *info) if (info->screen_base) iounmap(info->screen_base); + framebuffer_release(info); + if (mem) release_mem_region(mem->start, resource_size(mem)); } @@ -545,8 +551,8 @@ static int simplefb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* simplefb_destroy takes care of info cleanup */ unregister_framebuffer(info); - framebuffer_release(info); return 0; } From d258d00fb9c7c0cdf9d10c1ded84f10339d2d349 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:05:40 +0200 Subject: [PATCH 653/803] fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220540.366218-1-javierm@redhat.com --- drivers/video/fbdev/efifb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index ea42ba6445b2..cfa3dc0b4eee 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -243,6 +243,10 @@ error: static inline void efifb_show_boot_graphics(struct fb_info *info) {} #endif +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void efifb_destroy(struct fb_info *info) { if (efifb_pci_dev) @@ -254,6 +258,9 @@ static void efifb_destroy(struct fb_info *info) else memunmap(info->screen_base); } + + framebuffer_release(info); + if (request_mem_succeeded) release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); @@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* efifb_destroy takes care of info cleanup */ unregister_framebuffer(info); sysfs_remove_groups(&pdev->dev.kobj, efifb_groups); - framebuffer_release(info); return 0; } From b3c9a924aab61adbc29df110006aa03afe1a78ba Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 00:06:31 +0200 Subject: [PATCH 654/803] fbdev: vesafb: Cleanup fb_info in .fb_destroy rather than .remove The driver is calling framebuffer_release() in its .remove callback, but this will cause the struct fb_info to be freed too early. Since it could be that a reference is still hold to it if user-space opened the fbdev. This would lead to a use-after-free error if the framebuffer device was unregistered but later a user-space process tries to close the fbdev fd. To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy instead of doing it in the driver's .remove callback. Strictly speaking, the code flow in the driver is still wrong because all the hardware cleanupd (i.e: iounmap) should be done in .remove while the software cleanup (i.e: releasing the framebuffer) should be done in the .fb_destroy handler. But this at least makes to match the behavior before commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220505220631.366371-1-javierm@redhat.com --- drivers/video/fbdev/vesafb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c index df6de5a9dd4c..e25e8de5ff67 100644 --- a/drivers/video/fbdev/vesafb.c +++ b/drivers/video/fbdev/vesafb.c @@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green, return err; } +/* + * fb_ops.fb_destroy is called by the last put_fb_info() call at the end + * of unregister_framebuffer() or fb_release(). Do any cleanup here. + */ static void vesafb_destroy(struct fb_info *info) { struct vesafb_par *par = info->par; @@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info) if (info->screen_base) iounmap(info->screen_base); release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); + + framebuffer_release(info); } static struct fb_ops vesafb_ops = { @@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); + /* vesafb_destroy takes care of info cleanup */ unregister_framebuffer(info); if (((struct vesafb_par *)(info->par))->region) release_region(0x3c0, 32); - framebuffer_release(info); return 0; } From 581dd69830341d299b0c097fc366097ab497d679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Mon, 2 May 2022 10:49:52 +1000 Subject: [PATCH 655/803] firmware_loader: use kernel credentials when reading firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Device drivers may decide to not load firmware when probed to avoid slowing down the boot process should the firmware filesystem not be available yet. In this case, the firmware loading request may be done when a device file associated with the driver is first accessed. The credentials of the userspace process accessing the device file may be used to validate access to the firmware files requested by the driver. Ensure that the kernel assumes the responsibility of reading the firmware. This was observed on Android for a graphic driver loading their firmware when the device file (e.g. /dev/mali0) was first opened by userspace (i.e. surfaceflinger). The security context of surfaceflinger was used to validate the access to the firmware file (e.g. /vendor/firmware/mali.bin). Previously, Android configurations were not setting up the firmware_class.path command line argument and were relying on the userspace fallback mechanism. In this case, the security context of the userspace daemon (i.e. ueventd) was consistently used to read firmware files. More Android devices are now found to set firmware_class.path which gives the kernel the opportunity to read the firmware directly (via kernel_read_file_from_path_initns). In this scenario, the current process credentials were used, even if unrelated to the loading of the firmware file. Signed-off-by: Thiébaud Weksteen Cc: # 5.10 Reviewed-by: Paul Moore Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20220502004952.3970800-1-tweek@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 94d1789a233e..406a907a4cae 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -735,6 +735,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; + struct cred *kern_cred = NULL; + const struct cred *old_cred; bool nondirect = false; int ret; @@ -751,6 +753,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret <= 0) /* error or already assigned */ goto out; + /* + * We are about to try to access the firmware file. Because we may have been + * called by a driver when serving an unrelated request from userland, we use + * the kernel credentials to read the file. + */ + kern_cred = prepare_kernel_cred(NULL); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); /* Only full reads can support decompression, platform, and sysfs. */ @@ -776,6 +790,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device); + revert_creds(old_cred); + put_cred(kern_cred); + out: if (ret < 0) { fw_abort_batch_reqs(fw); From 8bf6e0e3c7de857b91a75aa57cecd56c10035bb2 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 27 Apr 2022 10:29:57 -0700 Subject: [PATCH 656/803] Documentation/process: Make groups alphabetical and use tabs consistently The list appears to be grouped by type (silicon, software, cloud) and mostly alphabetical within each group, with a few exceptions. Before adding to it, cleanup the list to be alphabetical within the groups, and use tabs consistently throughout the list. Signed-off-by: Darren Hart Link: https://lore.kernel.org/r/ec574b5d55584a3adda9bd31b7695193636ff136.1650995848.git.darren@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6f8f36e10e8b..4f9efae02b6b 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -244,10 +244,10 @@ disclosure of a particular issue, unless requested by a response team or by an involved disclosed party. The current ambassadors list: ============= ======================================================== - ARM Grant Likely AMD Tom Lendacky - IBM Z Christian Borntraeger - IBM Power Anton Blanchard + ARM Grant Likely + IBM Power Anton Blanchard + IBM Z Christian Borntraeger Intel Tony Luck Qualcomm Trilok Soni From 29ad05fd6760c55fa7ab0e1f96e5be1b66daa4fc Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 27 Apr 2022 10:29:58 -0700 Subject: [PATCH 657/803] Documentation/process: Add embargoed HW contact for Ampere Computing Add Darren Hart as Ampere Computing's ambassador for the embargoed hardware issues process. Signed-off-by: Darren Hart Link: https://lore.kernel.org/r/2e36a8e925bc958928b4afa189b2f876c392831b.1650995848.git.darren@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 4f9efae02b6b..98d7bc868f2a 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -245,6 +245,7 @@ an involved disclosed party. The current ambassadors list: ============= ======================================================== AMD Tom Lendacky + Ampere Darren Hart ARM Grant Likely IBM Power Anton Blanchard IBM Z Christian Borntraeger From c25d7f32e3e209462cd82e6e93e66b72dbb2308f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 28 Apr 2022 22:05:00 -0500 Subject: [PATCH 658/803] platform/x86: thinkpad_acpi: Convert btusb DMI list to quirks DMI matching in thinkpad_acpi happens local to a function meaning quirks can only match that function. Future changes to thinkpad_acpi may need to quirk other code, so change this to use a quirk infrastructure. Signed-off-by: Mario Limonciello Tested-by: Mark Pearson Link: https://lore.kernel.org/r/20220429030501.1909-2-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c568fae56db2..2820205c01fd 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -309,6 +309,15 @@ struct ibm_init_struct { struct ibm_struct *data; }; +/* DMI Quirks */ +struct quirk_entry { + bool btusb_bug; +}; + +static struct quirk_entry quirk_btusb_bug = { + .btusb_bug = true, +}; + static struct { u32 bluetooth:1; u32 hotkey:1; @@ -338,6 +347,7 @@ static struct { u32 hotkey_poll_active:1; u32 has_adaptive_kbd:1; u32 kbd_lang:1; + struct quirk_entry *quirks; } tp_features; static struct { @@ -4359,9 +4369,10 @@ static void bluetooth_exit(void) bluetooth_shutdown(); } -static const struct dmi_system_id bt_fwbug_list[] __initconst = { +static const struct dmi_system_id fwbug_list[] __initconst = { { .ident = "ThinkPad E485", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20KU"), @@ -4369,6 +4380,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad E585", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20KV"), @@ -4376,6 +4388,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A285 - 20MW", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MW"), @@ -4383,6 +4396,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A285 - 20MX", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MX"), @@ -4390,6 +4404,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A485 - 20MU", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MU"), @@ -4397,6 +4412,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = { }, { .ident = "ThinkPad A485 - 20MV", + .driver_data = &quirk_btusb_bug, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_BOARD_NAME, "20MV"), @@ -4419,7 +4435,8 @@ static int __init have_bt_fwbug(void) * Some AMD based ThinkPads have a firmware bug that calling * "GBDC" will cause bluetooth on Intel wireless cards blocked */ - if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) { + if (tp_features.quirks && tp_features.quirks->btusb_bug && + pci_dev_present(fwbug_cards_ids)) { vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL, FW_BUG "disable bluetooth subdriver for Intel cards\n"); return 1; @@ -11496,6 +11513,7 @@ static void thinkpad_acpi_module_exit(void) static int __init thinkpad_acpi_module_init(void) { + const struct dmi_system_id *dmi_id; int ret, i; tpacpi_lifecycle = TPACPI_LIFE_INIT; @@ -11535,6 +11553,10 @@ static int __init thinkpad_acpi_module_init(void) return -ENODEV; } + dmi_id = dmi_first_match(fwbug_list); + if (dmi_id) + tp_features.quirks = dmi_id->driver_data; + /* Device initialization */ tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1, NULL, 0); From 455cd867b85b53fd3602345f9b8a8facc551adc9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 28 Apr 2022 22:05:01 -0500 Subject: [PATCH 659/803] platform/x86: thinkpad_acpi: Add a s2idle resume quirk for a number of laptops Lenovo laptops that contain NVME SSDs across a variety of generations have trouble resuming from suspend to idle when the IOMMU translation layer is active for the NVME storage device. This generally manifests as a large resume delay or page faults. These delays and page faults occur as a result of a Lenovo BIOS specific SMI that runs during the D3->D0 transition on NVME devices. This SMI occurs because of a flag that is set during resume by Lenovo firmware: ``` OperationRegion (PM80, SystemMemory, 0xFED80380, 0x10) Field (PM80, AnyAcc, NoLock, Preserve) { SI3R, 1 } Method (_ON, 0, NotSerialized) // _ON_: Power On { TPST (0x60D0) If ((DAS3 == 0x00)) { If (SI3R) { TPST (0x60E0) M020 (NBRI, 0x00, 0x00, 0x04, (NCMD | 0x06)) M020 (NBRI, 0x00, 0x00, 0x10, NBAR) APMC = HDSI /* \HDSI */ SLPS = 0x01 SI3R = 0x00 TPST (0x60E1) } D0NV = 0x01 } } ``` Create a quirk that will run early in the resume process to prevent this SMI from running. As any of these machines are fixed, they can be peeled back from this quirk or narrowed down to individual firmware versions. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1910 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1689 Signed-off-by: Mario Limonciello Tested-by: Mark Pearson Link: https://lore.kernel.org/r/20220429030501.1909-3-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 126 +++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 2820205c01fd..8180d7789f56 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -312,12 +312,17 @@ struct ibm_init_struct { /* DMI Quirks */ struct quirk_entry { bool btusb_bug; + u32 s2idle_bug_mmio; }; static struct quirk_entry quirk_btusb_bug = { .btusb_bug = true, }; +static struct quirk_entry quirk_s2idle_bug = { + .s2idle_bug_mmio = 0xfed80380, +}; + static struct { u32 bluetooth:1; u32 hotkey:1; @@ -4418,9 +4423,119 @@ static const struct dmi_system_id fwbug_list[] __initconst = { DMI_MATCH(DMI_BOARD_NAME, "20MV"), }, }, + { + .ident = "L14 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20X5"), + } + }, + { + .ident = "T14s Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XF"), + } + }, + { + .ident = "X13 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XH"), + } + }, + { + .ident = "T14 Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20XK"), + } + }, + { + .ident = "T14 Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UD"), + } + }, + { + .ident = "T14 Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UE"), + } + }, + { + .ident = "T14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), + } + }, + { + .ident = "P14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"), + } + }, + { + .ident = "P14s Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21A0"), + } + }, {} }; +#ifdef CONFIG_SUSPEND +/* + * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0 + * transition that occurs specifically when exiting suspend to idle which can cause + * large delays during resume when the IOMMU translation layer is enabled (the default + * behavior) for NVME devices: + * + * To avoid this firmware problem, skip the SMI handler on these machines before the + * D0 transition occurs. + */ +static void thinkpad_acpi_amd_s2idle_restore(void) +{ + struct resource *res; + void __iomem *addr; + u8 val; + + res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1, + "thinkpad_acpi_pm80"); + if (!res) + return; + + addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1); + if (!addr) + goto cleanup_resource; + + val = ioread8(addr); + iowrite8(val & ~BIT(0), addr); + + iounmap(addr); +cleanup_resource: + release_resource(res); +} + +static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = { + .restore = thinkpad_acpi_amd_s2idle_restore, +}; +#endif + static const struct pci_device_id fwbug_cards_ids[] __initconst = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) }, @@ -11472,6 +11587,10 @@ static void thinkpad_acpi_module_exit(void) tpacpi_lifecycle = TPACPI_LIFE_EXITING; +#ifdef CONFIG_SUSPEND + if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) + acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops); +#endif if (tpacpi_hwmon) hwmon_device_unregister(tpacpi_hwmon); if (tp_features.sensors_pdrv_registered) @@ -11645,6 +11764,13 @@ static int __init thinkpad_acpi_module_init(void) tp_features.input_device_registered = 1; } +#ifdef CONFIG_SUSPEND + if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) { + if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops)) + pr_info("Using s2idle quirk to avoid %s platform firmware bug\n", + (dmi_id && dmi_id->ident) ? dmi_id->ident : ""); + } +#endif return 0; } From aa2fef6f40e6ccc22e932b36898f260f0e5a021a Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 2 May 2022 15:12:00 -0400 Subject: [PATCH 660/803] platform/x86: thinkpad_acpi: Correct dual fan probe There was an issue with the dual fan probe whereby the probe was failing as it assuming that second_fan support was not available. Corrected the logic so the probe works correctly. Cleaned up so quirks only used if 2nd fan not detected. Tested on X1 Carbon 10 (2 fans), X1 Carbon 9 (2 fans) and T490 (1 fan) Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220502191200.63470-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 8180d7789f56..e6cb4a14cdd4 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8880,24 +8880,27 @@ static int __init fan_init(struct ibm_init_struct *iibm) fan_status_access_mode = TPACPI_FAN_RD_TPEC; if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); - if (quirks & TPACPI_FAN_2FAN) { - tp_features.second_fan = 1; - pr_info("secondary fan support enabled\n"); - } - if (quirks & TPACPI_FAN_2CTL) { - tp_features.second_fan = 1; - tp_features.second_fan_ctl = 1; - pr_info("secondary fan control enabled\n"); - } /* Try and probe the 2nd fan */ + tp_features.second_fan = 1; /* needed for get_speed to work */ res = fan2_get_speed(&speed); if (res >= 0) { /* It responded - so let's assume it's there */ tp_features.second_fan = 1; tp_features.second_fan_ctl = 1; pr_info("secondary fan control detected & enabled\n"); + } else { + /* Fan not auto-detected */ + tp_features.second_fan = 0; + if (quirks & TPACPI_FAN_2FAN) { + tp_features.second_fan = 1; + pr_info("secondary fan support enabled\n"); + } + if (quirks & TPACPI_FAN_2CTL) { + tp_features.second_fan = 1; + tp_features.second_fan_ctl = 1; + pr_info("secondary fan control enabled\n"); + } } - } else { pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n"); return -ENODEV; From 2cdfa0c20d58da3757054797c2974c967035926a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 29 Apr 2022 08:23:22 -0400 Subject: [PATCH 661/803] platform/x86/intel: Fix 'rmmod pmt_telemetry' panic 'rmmod pmt_telemetry' panics with: BUG: kernel NULL pointer dereference, address: 0000000000000040 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 1697 Comm: rmmod Tainted: G S W -------- --- 5.18.0-rc4 #3 Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.3056.B00.2201310233 01/31/2022 RIP: 0010:device_del+0x1b/0x3d0 Code: e8 1a d9 e9 ff e9 58 ff ff ff 48 8b 08 eb dc 0f 1f 44 00 00 41 56 41 55 41 54 55 48 8d af 80 00 00 00 53 48 89 fb 48 83 ec 18 <4c> 8b 67 40 48 89 ef 65 48 8b 04 25 28 00 00 00 48 89 44 24 10 31 RSP: 0018:ffffb520415cfd60 EFLAGS: 00010286 RAX: 0000000000000070 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000080 R08: ffffffffffffffff R09: ffffb520415cfd78 R10: 0000000000000002 R11: ffffb520415cfd78 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f7e198e5740(0000) GS:ffff905c9f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 000000010782a005 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: ? __xa_erase+0x53/0xb0 device_unregister+0x13/0x50 intel_pmt_dev_destroy+0x34/0x60 [pmt_class] pmt_telem_remove+0x40/0x50 [pmt_telemetry] auxiliary_bus_remove+0x18/0x30 device_release_driver_internal+0xc1/0x150 driver_detach+0x44/0x90 bus_remove_driver+0x74/0xd0 auxiliary_driver_unregister+0x12/0x20 pmt_telem_exit+0xc/0xe4a [pmt_telemetry] __x64_sys_delete_module+0x13a/0x250 ? syscall_trace_enter.isra.19+0x11e/0x1a0 do_syscall_64+0x58/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x67/0x80 ? exc_page_fault+0x64/0x140 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f7e1803a05b Code: 73 01 c3 48 8b 0d 2d 4e 38 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fd 4d 38 00 f7 d8 64 89 01 48 The probe function, pmt_telem_probe(), adds an entry for devices even if they have not been initialized. This results in the array of initialized devices containing both initialized and uninitialized entries. This causes a panic in the remove function, pmt_telem_remove() which expects the array to only contain initialized entries. Only use an entry when a device is initialized. Cc: "David E. Box" Cc: Hans de Goede Cc: Mark Gross Cc: platform-driver-x86@vger.kernel.org Signed-off-by: David Arcari Signed-off-by: Prarit Bhargava Reviewed-by: David E. Box Link: https://lore.kernel.org/r/20220429122322.2550003-1-prarit@redhat.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmt/telemetry.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c index 6b6f3e2a617a..f73ecfd4a309 100644 --- a/drivers/platform/x86/intel/pmt/telemetry.c +++ b/drivers/platform/x86/intel/pmt/telemetry.c @@ -103,7 +103,7 @@ static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxilia auxiliary_set_drvdata(auxdev, priv); for (i = 0; i < intel_vsec_dev->num_resources; i++) { - struct intel_pmt_entry *entry = &priv->entry[i]; + struct intel_pmt_entry *entry = &priv->entry[priv->num_entries]; ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i); if (ret < 0) From ed13d4ac57474d959c40fd05d8860e2b1607becb Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 29 Apr 2022 20:00:49 +0200 Subject: [PATCH 662/803] platform/surface: gpe: Add support for Surface Pro 8 The new Surface Pro 8 uses GPEs for lid events as well. Add an entry for that so that the lid can be used to wake the device. Note that this is a device with a keyboard type-cover, where this acts as the "lid". Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220429180049.1282447-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_gpe.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c index c1775db29efb..ec66fde28e75 100644 --- a/drivers/platform/surface/surface_gpe.c +++ b/drivers/platform/surface/surface_gpe.c @@ -99,6 +99,14 @@ static const struct dmi_system_id dmi_lid_device_table[] = { }, .driver_data = (void *)lid_device_props_l4D, }, + { + .ident = "Surface Pro 8", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 8"), + }, + .driver_data = (void *)lid_device_props_l4B, + }, { .ident = "Surface Book 1", .matches = { From 44acfc22c7d055d9c4f8f0974ee28422405b971a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 29 Apr 2022 21:57:38 +0200 Subject: [PATCH 663/803] platform/surface: aggregator: Fix initialization order when compiling as builtin module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building the Surface Aggregator Module (SAM) core, registry, and other SAM client drivers as builtin modules (=y), proper initialization order is not guaranteed. Due to this, client driver registration (triggered by device registration in the registry) races against bus initialization in the core. If any attempt is made at registering the device driver before the bus has been initialized (i.e. if bus initialization fails this race) driver registration will fail with a message similar to: Driver surface_battery was unable to register with bus_type surface_aggregator because the bus was not initialized Switch from module_init() to subsys_initcall() to resolve this issue. Note that the serdev subsystem uses postcore_initcall() so we are still able to safely register the serdev device driver for the core. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Reported-by: Blaž Hrastnik Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220429195738.535751-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index d384d36098c2..a62c5dfe42d6 100644 --- a/drivers/platform/surface/aggregator/core.c +++ b/drivers/platform/surface/aggregator/core.c @@ -817,7 +817,7 @@ err_cpkg: err_bus: return status; } -module_init(ssam_core_init); +subsys_initcall(ssam_core_init); static void __exit ssam_core_exit(void) { From aa482ddca85a3485be0e7b83a0789dc4d987670b Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Fri, 8 Apr 2022 12:08:38 -0500 Subject: [PATCH 664/803] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems Active State Power Management (ASPM) feature is enabled since kernel 5.14. There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and RX640, that do not work with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as video/display output, Intel Alder Lake based systems will freeze after suspend/resume. The issue was originally reported on one system (Dell Precision 3660 with BIOS version 0.14.81), but was later confirmed to affect at least 4 pre-production Alder Lake based systems. Add an extra check to disable ASPM on Intel Alder Lake based systems with the problematic AMD Volcanic Islands GFX cards. Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885 Signed-off-by: Richard Gong Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bc..45f0188c4273 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,6 +81,10 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" +#if IS_ENABLED(CONFIG_X86) +#include +#endif + #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev) WREG32_PCIE(ixPCIE_LC_CNTL, data); } +static bool aspm_support_quirk_check(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); +#else + return true; +#endif +} + static void vi_program_aspm(struct amdgpu_device *adev) { u32 data, data1, orig; bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev)) + if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check()) return; if (adev->flags & AMD_IS_APU || From 9b9bd3f640640f94272a461b2dfe558f91b322c5 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Sat, 19 Mar 2022 16:34:24 -0400 Subject: [PATCH 665/803] drm/amd/display: undo clearing of z10 related function pointers [Why] Z10 and S0i3 have some shared path. Previous code clean up , incorrectly removed these pointers, which breaks s0i3 restore [How] Do not clear the function pointers based on Z10 disable. Reviewed-by: Nicholas Kazlauskas Acked-by: Pavle Kotarac Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index d7559e5a99ce..e708f07fe75a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } - if (dc->debug.disable_z10) { - /*hw not support z10 or sw disable it*/ - dc->hwss.z10_restore = NULL; - dc->hwss.z10_save_init = NULL; - } } From dba785798526a3282cc4d0f0ea751883715dbbb4 Mon Sep 17 00:00:00 2001 From: Puyou Lu Date: Fri, 6 May 2022 16:06:30 +0800 Subject: [PATCH 666/803] gpio: pca953x: fix irq_stat not updated when irq is disabled (irq_mask not set) When one port's input state get inverted (eg. from low to hight) after pca953x_irq_setup but before setting irq_mask (by some other driver such as "gpio-keys"), the next inversion of this port (eg. from hight to low) will not be triggered any more (because irq_stat is not updated at the first time). Issue should be fixed after this commit. Fixes: 89ea8bbe9c3e ("gpio: pca953x.c: add interrupt handling capability") Signed-off-by: Puyou Lu Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index d2fe76f3f34f..8726921a1129 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -762,11 +762,11 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio); bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio); + bitmap_copy(chip->irq_stat, new_stat, gc->ngpio); + if (bitmap_empty(trigger, gc->ngpio)) return false; - bitmap_copy(chip->irq_stat, new_stat, gc->ngpio); - bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio); bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio); bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio); From 5a4eb91634711aa6278d0f5b3d33c3ea63397639 Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Mon, 11 Apr 2022 15:20:12 +0530 Subject: [PATCH 667/803] dt-bindings: arm-smmu: Add binding for SDX65 SMMU Add devicetree binding for Qualcomm SDX65 SMMU. Signed-off-by: Rohit Agarwal Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1649670615-21268-5-git-send-email-quic_rohiagar@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index da5381c8ee11..1f99bffea537 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -39,6 +39,7 @@ properties: - qcom,sc8180x-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 + - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 From d9ed8af1dee37f181096631fb03729ece98ba816 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 25 Apr 2022 19:41:36 +0800 Subject: [PATCH 668/803] iommu/arm-smmu: fix possible null-ptr-deref in arm_smmu_device_probe() It will cause null-ptr-deref when using 'res', if platform_get_resource() returns NULL, so move using 'res' after devm_ioremap_resource() that will check it to avoid null-ptr-deref. And use devm_platform_get_and_ioremap_resource() to simplify code. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220425114136.2649310-1-yangyingliang@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 568cce590ccc..52b71f6aee3f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2092,11 +2092,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (err) return err; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ioaddr = res->start; - smmu->base = devm_ioremap_resource(dev, res); + smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); + ioaddr = res->start; /* * The resource size should effectively match the value of SMMU_TOP; * stash that temporarily until we know PAGESIZE to validate it with. From b131fa8c1d2afd05d0b7598621114674289c2fbb Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 25 Apr 2022 19:45:25 +0800 Subject: [PATCH 669/803] iommu/arm-smmu-v3: check return value after calling platform_get_resource() It will cause null-ptr-deref if platform_get_resource() returns NULL, we need check the return value. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220425114525.2651143-1-yangyingliang@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 627a3ed5ee8f..88817a3376ef 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3770,6 +3770,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; if (resource_size(res) < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; From cbd23144f7662b00bcde32a938c4a4057e476d68 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Apr 2022 14:04:45 +0100 Subject: [PATCH 670/803] iommu/arm-smmu-v3-sva: Fix mm use-after-free We currently call arm64_mm_context_put() without holding a reference to the mm, which can result in use-after-free. Call mmgrab()/mmdrop() to ensure the mm only gets freed after we unpinned the ASID. Fixes: 32784a9562fb ("iommu/arm-smmu-v3: Implement iommu_sva_bind/unbind()") Signed-off-by: Jean-Philippe Brucker Tested-by: Zhangfei Gao Link: https://lore.kernel.org/r/20220426130444.300556-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index c623dae1e115..1ef7bbb4acf3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "arm-smmu-v3.h" @@ -96,9 +97,14 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) struct arm_smmu_ctx_desc *cd; struct arm_smmu_ctx_desc *ret = NULL; + /* Don't free the mm until we release the ASID */ + mmgrab(mm); + asid = arm64_mm_context_get(mm); - if (!asid) - return ERR_PTR(-ESRCH); + if (!asid) { + err = -ESRCH; + goto out_drop_mm; + } cd = kzalloc(sizeof(*cd), GFP_KERNEL); if (!cd) { @@ -165,6 +171,8 @@ out_free_cd: kfree(cd); out_put_context: arm64_mm_context_put(mm); +out_drop_mm: + mmdrop(mm); return err < 0 ? ERR_PTR(err) : ret; } @@ -173,6 +181,7 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) if (arm_smmu_free_asid(cd)) { /* Unpin ASID */ arm64_mm_context_put(cd->mm); + mmdrop(cd->mm); kfree(cd); } } From 38db6b41b2f499c3a6e068f7796a2dd050b24166 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 3 May 2022 09:34:28 -0700 Subject: [PATCH 671/803] dt-bindings: arm-smmu: Add compatible for Qualcomm SC8280XP Add compatible for the Qualcomm SC8280XP platform to the ARM SMMU DeviceTree binding. Signed-off-by: Bjorn Andersson Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220503163429.960998-2-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 1f99bffea537..96891576a575 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -37,6 +37,7 @@ properties: - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 + - qcom,sc8280xp-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 From d044023e219d415d2af87e5e48e9e03488ad145c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 3 May 2022 09:34:29 -0700 Subject: [PATCH 672/803] iommu/arm-smmu-qcom: Add SC8280XP support Add the Qualcomm SC8280XP platform to the list of compatible for which the Qualcomm-impl of the ARM SMMU should apply. Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220503163429.960998-3-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index ba6298c7140e..7820711c4560 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -408,6 +408,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc7180-smmu-500" }, { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, + { .compatible = "qcom,sc8280xp-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm6125-smmu-500" }, From c02bda09f91a340bc9fae4e236ce3e3ec6b1c9ff Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 29 Apr 2022 10:22:41 +0200 Subject: [PATCH 673/803] dt-bindings: arm-smmu: Document nvidia,memory-controller property On NVIDIA SoC's the ARM SMMU needs to interact with the memory controller in order to map memory clients to the corresponding stream IDs. Document how the nvidia,memory-controller property can be used to achieve this. Note that this is a backwards-incompatible change that is, however, necessary to ensure correctness. Without the new property, most of the devices would still work but it is not guaranteed that all will. Reviewed-by: Rob Herring Acked-by: Will Deacon Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20220429082243.496000-2-thierry.reding@gmail.com Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 96891576a575..ffd537a94a15 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -159,6 +159,17 @@ properties: power-domains: maxItems: 1 + nvidia,memory-controller: + description: | + A phandle to the memory controller on NVIDIA Tegra186 and later SoCs. + The memory controller needs to be programmed with a mapping of memory + client IDs to ARM SMMU stream IDs. + + If this property is absent, the mapping programmed by early firmware + will be used and it is not guaranteed that IOMMU translations will be + enabled for any given device. + $ref: /schemas/types.yaml#/definitions/phandle + required: - compatible - reg @@ -181,6 +192,12 @@ allOf: reg: minItems: 1 maxItems: 2 + + # The reference to the memory controller is required to ensure that the + # memory client to stream ID mapping can be done synchronously with the + # IOMMU attachment. + required: + - nvidia,memory-controller else: properties: reg: From 95d5aeabda00baaccb2be262511adac74255dd7c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 29 Apr 2022 10:22:42 +0200 Subject: [PATCH 674/803] dt-bindings: arm-smmu: Add compatible for Tegra234 SOC The NVIDIA Tegra234 SoC comes with one single-instance ARM SMMU used by isochronous memory clients and two dual-instance ARM SMMUs used by non- isochronous memory clients. Reviewed-by: Rob Herring Acked-by: Will Deacon Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20220429082243.496000-3-thierry.reding@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index ffd537a94a15..76fc2c0f4d54 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -64,8 +64,9 @@ properties: for improved performance. items: - enum: - - nvidia,tegra194-smmu - nvidia,tegra186-smmu + - nvidia,tegra194-smmu + - nvidia,tegra234-smmu - const: nvidia,smmu-500 - items: - const: arm,mmu-500 @@ -185,8 +186,9 @@ allOf: compatible: contains: enum: - - nvidia,tegra194-smmu - nvidia,tegra186-smmu + - nvidia,tegra194-smmu + - nvidia,tegra234-smmu then: properties: reg: From 5ca216155b5ead6e2d64738644afed9ac17f1fe3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 29 Apr 2022 10:22:43 +0200 Subject: [PATCH 675/803] iommu/arm-smmu: Support Tegra234 SMMU Allow the NVIDIA-specific ARM SMMU implementation to bind to the SMMU instances found on Tegra234. Acked-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20220429082243.496000-4-thierry.reding@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 2c25cce38060..658f3cc83278 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -211,7 +211,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; - if (of_device_is_compatible(np, "nvidia,tegra194-smmu") || + if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || + of_device_is_compatible(np, "nvidia,tegra194-smmu") || of_device_is_compatible(np, "nvidia,tegra186-smmu")) return nvidia_smmu_impl_init(smmu); From 0c2c7c069285374fc8feacddc0498f8ab7627117 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Mon, 2 May 2022 09:58:07 -0700 Subject: [PATCH 676/803] KVM: SEV: Mark nested locking of vcpu->lock svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all source and target vcpu->locks. Unfortunately there is an 8 subclass limit, so a new subclass cannot be used for each vCPU. Instead maintain ownership of the first vcpu's mutex.dep_map using a role specific subclass: source vs target. Release the other vcpu's mutex.dep_maps. Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration") Reported-by: John Sperbeck Suggested-by: David Rientjes Suggested-by: Sean Christopherson Suggested-by: Paolo Bonzini Cc: Hillf Danton Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Gonda Message-Id: <20220502165807.529624-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0ad70c12c7c3..7c392873626f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1594,24 +1594,51 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) atomic_set_release(&src_sev->migration_in_progress, 0); } +/* vCPU mutex subclasses. */ +enum sev_migration_role { + SEV_MIGRATION_SOURCE = 0, + SEV_MIGRATION_TARGET, + SEV_NR_MIGRATION_ROLES, +}; -static int sev_lock_vcpus_for_migration(struct kvm *kvm) +static int sev_lock_vcpus_for_migration(struct kvm *kvm, + enum sev_migration_role role) { struct kvm_vcpu *vcpu; unsigned long i, j; + bool first = true; kvm_for_each_vcpu(i, vcpu, kvm) { - if (mutex_lock_killable(&vcpu->mutex)) + if (mutex_lock_killable_nested(&vcpu->mutex, role)) goto out_unlock; + + if (first) { + /* + * Reset the role to one that avoids colliding with + * the role used for the first vcpu mutex. + */ + role = SEV_NR_MIGRATION_ROLES; + first = false; + } else { + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); + } } return 0; out_unlock: + + first = true; kvm_for_each_vcpu(j, vcpu, kvm) { if (i == j) break; + if (first) + first = false; + else + mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); + + mutex_unlock(&vcpu->mutex); } return -EINTR; @@ -1621,8 +1648,15 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + bool first = true; kvm_for_each_vcpu(i, vcpu, kvm) { + if (first) + first = false; + else + mutex_acquire(&vcpu->mutex.dep_map, + SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); + mutex_unlock(&vcpu->mutex); } } @@ -1748,10 +1782,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) charged = true; } - ret = sev_lock_vcpus_for_migration(kvm); + ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); if (ret) goto out_dst_cgroup; - ret = sev_lock_vcpus_for_migration(source_kvm); + ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); if (ret) goto out_dst_vcpu; From 053d2290c0307e3642e75e0185ddadf084dc36c1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 May 2022 22:18:50 +0000 Subject: [PATCH 677/803] KVM: VMX: Exit to userspace if vCPU has injected exception and invalid state Exit to userspace with an emulation error if KVM encounters an injected exception with invalid guest state, in addition to the existing check of bailing if there's a pending exception (KVM doesn't support emulating exceptions except when emulating real mode via vm86). In theory, KVM should never get to such a situation as KVM is supposed to exit to userspace before injecting an exception with invalid guest state. But in practice, userspace can intervene and manually inject an exception and/or stuff registers to force invalid guest state while a previously injected exception is awaiting reinjection. Fixes: fc4fad79fc3d ("KVM: VMX: Reject KVM_RUN if emulation is required with pending exception") Reported-by: syzbot+cfafed3bb76d3e37581b@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20220502221850.131873-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d58b763df855..610355b9ccce 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5472,7 +5472,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); return vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending; + (vcpu->arch.exception.pending || vcpu->arch.exception.injected); } static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) From 486b9eee57ddca5c9a2d59fc41153f36002e0a00 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 23 Apr 2022 12:20:21 +0200 Subject: [PATCH 678/803] ice: Fix race during aux device (un)plugging Function ice_plug_aux_dev() assigns pf->adev field too early prior aux device initialization and on other side ice_unplug_aux_dev() starts aux device deinit and at the end assigns NULL to pf->adev. This is wrong because pf->adev should always be non-NULL only when aux device is fully initialized and ready. This wrong order causes a crash when ice_send_event_to_aux() call occurs because that function depends on non-NULL value of pf->adev and does not assume that aux device is half-initialized or half-destroyed. After order correction the race window is tiny but it is still there, as Leon mentioned and manipulation with pf->adev needs to be protected by mutex. Fix (un-)plugging functions so pf->adev field is set after aux device init and prior aux device destroy and protect pf->adev assignment by new mutex. This mutex is also held during ice_send_event_to_aux() call to ensure that aux device is valid during that call. Note that device lock used ice_send_event_to_aux() needs to be kept to avoid race with aux drv unload. Reproducer: cycle=1 while :;do echo "#### Cycle: $cycle" ip link set ens7f0 mtu 9000 ip link add bond0 type bond mode 1 miimon 100 ip link set bond0 up ifenslave bond0 ens7f0 ip link set bond0 mtu 9000 ethtool -L ens7f0 combined 1 ip link del bond0 ip link set ens7f0 mtu 1500 sleep 1 let cycle++ done In short when the device is added/removed to/from bond the aux device is unplugged/plugged. When MTU of the device is changed an event is sent to aux device asynchronously. This can race with (un)plugging operation and because pf->adev is set too early (plug) or too late (unplug) the function ice_send_event_to_aux() can touch uninitialized or destroyed fields. In the case of crash below pf->adev->dev.mutex. Crash: [ 53.372066] bond0: (slave ens7f0): making interface the new active one [ 53.378622] bond0: (slave ens7f0): Enslaving as an active interface with an u p link [ 53.386294] IPv6: ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready [ 53.549104] bond0: (slave ens7f1): Enslaving as a backup interface with an up link [ 54.118906] ice 0000:ca:00.0 ens7f0: Number of in use tx queues changed inval idating tc mappings. Priority traffic classification disabled! [ 54.233374] ice 0000:ca:00.1 ens7f1: Number of in use tx queues changed inval idating tc mappings. Priority traffic classification disabled! [ 54.248204] bond0: (slave ens7f0): Releasing backup interface [ 54.253955] bond0: (slave ens7f1): making interface the new active one [ 54.274875] bond0: (slave ens7f1): Releasing backup interface [ 54.289153] bond0 (unregistering): Released all slaves [ 55.383179] MII link monitoring set to 100 ms [ 55.398696] bond0: (slave ens7f0): making interface the new active one [ 55.405241] BUG: kernel NULL pointer dereference, address: 0000000000000080 [ 55.405289] bond0: (slave ens7f0): Enslaving as an active interface with an u p link [ 55.412198] #PF: supervisor write access in kernel mode [ 55.412200] #PF: error_code(0x0002) - not-present page [ 55.412201] PGD 25d2ad067 P4D 0 [ 55.412204] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 55.412207] CPU: 0 PID: 403 Comm: kworker/0:2 Kdump: loaded Tainted: G S 5.17.0-13579-g57f2d6540f03 #1 [ 55.429094] bond0: (slave ens7f1): Enslaving as a backup interface with an up link [ 55.430224] Hardware name: Dell Inc. PowerEdge R750/06V45N, BIOS 1.4.4 10/07/ 2021 [ 55.430226] Workqueue: ice ice_service_task [ice] [ 55.468169] RIP: 0010:mutex_unlock+0x10/0x20 [ 55.472439] Code: 0f b1 13 74 96 eb e0 4c 89 ee eb d8 e8 79 54 ff ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 65 48 8b 04 25 40 ef 01 00 31 d2 48 0f b1 17 75 01 c3 e9 e3 fe ff ff 0f 1f 00 0f 1f 44 00 00 48 [ 55.491186] RSP: 0018:ff4454230d7d7e28 EFLAGS: 00010246 [ 55.496413] RAX: ff1a79b208b08000 RBX: ff1a79b2182e8880 RCX: 0000000000000001 [ 55.503545] RDX: 0000000000000000 RSI: ff4454230d7d7db0 RDI: 0000000000000080 [ 55.510678] RBP: ff1a79d1c7e48b68 R08: ff4454230d7d7db0 R09: 0000000000000041 [ 55.517812] R10: 00000000000000a5 R11: 00000000000006e6 R12: ff1a79d1c7e48bc0 [ 55.524945] R13: 0000000000000000 R14: ff1a79d0ffc305c0 R15: 0000000000000000 [ 55.532076] FS: 0000000000000000(0000) GS:ff1a79d0ffc00000(0000) knlGS:0000000000000000 [ 55.540163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.545908] CR2: 0000000000000080 CR3: 00000003487ae003 CR4: 0000000000771ef0 [ 55.553041] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 55.560173] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 55.567305] PKRU: 55555554 [ 55.570018] Call Trace: [ 55.572474] [ 55.574579] ice_service_task+0xaab/0xef0 [ice] [ 55.579130] process_one_work+0x1c5/0x390 [ 55.583141] ? process_one_work+0x390/0x390 [ 55.587326] worker_thread+0x30/0x360 [ 55.590994] ? process_one_work+0x390/0x390 [ 55.595180] kthread+0xe6/0x110 [ 55.598325] ? kthread_complete_and_exit+0x20/0x20 [ 55.603116] ret_from_fork+0x1f/0x30 [ 55.606698] Fixes: f9f5301e7e2d ("ice: Register auxiliary device to provide RDMA") Reviewed-by: Leon Romanovsky Signed-off-by: Ivan Vecera Reviewed-by: Dave Ertman Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_idc.c | 25 +++++++++++++++-------- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8ed3c9ab7ff7..a895e3a8e988 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -540,6 +540,7 @@ struct ice_pf { struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ struct mutex tc_mutex; /* lock to protect TC changes */ + struct mutex adev_mutex; /* lock to protect aux device access */ u32 msg_enable; struct ice_ptp ptp; struct tty_driver *ice_gnss_tty_driver; diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 25a436d342c2..3e3b2ed4cd5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) if (WARN_ON_ONCE(!in_task())) return; + mutex_lock(&pf->adev_mutex); if (!pf->adev) - return; + goto finish; device_lock(&pf->adev->dev); iadrv = ice_get_auxiliary_drv(pf); if (iadrv && iadrv->event_handler) iadrv->event_handler(pf, event); device_unlock(&pf->adev->dev); +finish: + mutex_unlock(&pf->adev_mutex); } /** @@ -290,7 +293,6 @@ int ice_plug_aux_dev(struct ice_pf *pf) return -ENOMEM; adev = &iadev->adev; - pf->adev = adev; iadev->pf = pf; adev->id = pf->aux_idx; @@ -300,18 +302,20 @@ int ice_plug_aux_dev(struct ice_pf *pf) ret = auxiliary_device_init(adev); if (ret) { - pf->adev = NULL; kfree(iadev); return ret; } ret = auxiliary_device_add(adev); if (ret) { - pf->adev = NULL; auxiliary_device_uninit(adev); return ret; } + mutex_lock(&pf->adev_mutex); + pf->adev = adev; + mutex_unlock(&pf->adev_mutex); + return 0; } @@ -320,12 +324,17 @@ int ice_plug_aux_dev(struct ice_pf *pf) */ void ice_unplug_aux_dev(struct ice_pf *pf) { - if (!pf->adev) - return; + struct auxiliary_device *adev; - auxiliary_device_delete(pf->adev); - auxiliary_device_uninit(pf->adev); + mutex_lock(&pf->adev_mutex); + adev = pf->adev; pf->adev = NULL; + mutex_unlock(&pf->adev_mutex); + + if (adev) { + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); + } } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9a0a358a15c2..949669fed7d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3769,6 +3769,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) static void ice_deinit_pf(struct ice_pf *pf) { ice_service_task_stop(pf); + mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); @@ -3847,6 +3848,7 @@ static int ice_init_pf(struct ice_pf *pf) mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); + mutex_init(&pf->adev_mutex); INIT_HLIST_HEAD(&pf->aq_wait_list); spin_lock_init(&pf->aq_wait_lock); From 6096dae926a22e2892ef9169f582589c16d39639 Mon Sep 17 00:00:00 2001 From: Anatolii Gerasymenko Date: Thu, 28 Apr 2022 12:01:00 +0000 Subject: [PATCH 679/803] ice: clear stale Tx queue settings before configuring The iAVF driver uses 3 virtchnl op codes to communicate with the PF regarding the VF Tx queues: * VIRTCHNL_OP_CONFIG_VSI_QUEUES configures the hardware and firmware logic for the Tx queues * VIRTCHNL_OP_ENABLE_QUEUES configures the queue interrupts * VIRTCHNL_OP_DISABLE_QUEUES disables the queue interrupts and Tx rings. There is a bug in the iAVF driver due to the race condition between VF reset request and shutdown being executed in parallel. This leads to a break in logic and VIRTCHNL_OP_DISABLE_QUEUES is not being sent. If this occurs, the PF driver never cleans up the Tx queues. This results in leaving behind stale Tx queue settings in the hardware and firmware. The most obvious outcome is that upon the next VIRTCHNL_OP_CONFIG_VSI_QUEUES, the PF will fail to program the Tx scheduler node due to a lack of space. We need to protect ICE driver against such situation. To fix this, make sure we clear existing stale settings out when handling VIRTCHNL_OP_CONFIG_VSI_QUEUES. This ensures we remove the previous settings. Calling ice_vf_vsi_dis_single_txq should be safe as it will do nothing if the queue is not configured. The function already handles the case when the Tx queue is not currently configured and exits with a 0 return in that case. Fixes: 7ad15440acf8 ("ice: Refactor VIRTCHNL_OP_CONFIG_VSI_QUEUES handling") Signed-off-by: Jacob Keller Signed-off-by: Anatolii Gerasymenko Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 68 ++++++++++++++----- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index b72606c9e6d0..2889e050a4c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1307,13 +1307,52 @@ error_param: NULL, 0); } +/** + * ice_vf_vsi_dis_single_txq - disable a single Tx queue + * @vf: VF to disable queue for + * @vsi: VSI for the VF + * @q_id: VF relative (0-based) queue ID + * + * Attempt to disable the Tx queue passed in. If the Tx queue was successfully + * disabled then clear q_id bit in the enabled queues bitmap and return + * success. Otherwise return error. + */ +static int +ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) +{ + struct ice_txq_meta txq_meta = { 0 }; + struct ice_tx_ring *ring; + int err; + + if (!test_bit(q_id, vf->txq_ena)) + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + q_id, vsi->vsi_num); + + ring = vsi->tx_rings[q_id]; + if (!ring) + return -EINVAL; + + ice_fill_txq_meta(vsi, ring, &txq_meta); + + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", + q_id, vsi->vsi_num); + return err; + } + + /* Clear enabled queues flag */ + clear_bit(q_id, vf->txq_ena); + + return 0; +} + /** * ice_vc_dis_qs_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer * - * called from the VF to disable all or specific - * queue(s) + * called from the VF to disable all or specific queue(s) */ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) { @@ -1350,30 +1389,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id]; - struct ice_txq_meta txq_meta = { 0 }; - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (!test_bit(vf_q_id, vf->txq_ena)) - dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", - vf_q_id, vsi->vsi_num); - - ice_fill_txq_meta(vsi, ring, &txq_meta); - - if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, - ring, &txq_meta)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); + if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - - /* Clear enabled queues flag */ - clear_bit(vf_q_id, vf->txq_ena); } } @@ -1622,6 +1646,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qpi->txq.ring_len > 0) { vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; vsi->tx_rings[i]->count = qpi->txq.ring_len; + + /* Disable any existing queue first */ + if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Configure a queue with the requested settings */ if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; From a11b6c1a383ff092f432e040c20e032503785d47 Mon Sep 17 00:00:00 2001 From: Michal Michalik Date: Wed, 20 Apr 2022 14:23:02 +0200 Subject: [PATCH 680/803] ice: fix PTP stale Tx timestamps cleanup Read stale PTP Tx timestamps from PHY on cleanup. After running out of Tx timestamps request handlers, hardware (HW) stops reporting finished requests. Function ice_ptp_tx_tstamp_cleanup() used to only clean up stale handlers in driver and was leaving the hardware registers not read. Not reading stale PTP Tx timestamps prevents next interrupts from arriving and makes timestamping unusable. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Michal Michalik Reviewed-by: Jacob Keller Reviewed-by: Paul Menzel Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index a1cd33273ca4..da025c204577 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2287,6 +2287,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) /** * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped + * @hw: pointer to the hw struct * @tx: PTP Tx tracker to clean up * * Loop through the Tx timestamp requests and see if any of them have been @@ -2295,7 +2296,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) * timestamp will never be captured. This might happen if the packet gets * discarded before it reaches the PHY timestamping block. */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) +static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx) { u8 idx; @@ -2304,11 +2305,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) for_each_set_bit(idx, tx->in_use, tx->len) { struct sk_buff *skb; + u64 raw_tstamp; /* Check if this SKB has been waiting for too long */ if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) continue; + /* Read tstamp to be able to use this register again */ + ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, + &raw_tstamp); + spin_lock(&tx->lock); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; @@ -2330,7 +2336,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx); + ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); /* Run twice a second */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, From 9e6c6d17d1d6a3f1515ce399f9a011629ec79aa0 Mon Sep 17 00:00:00 2001 From: Lokesh Dhoundiyal Date: Thu, 5 May 2022 14:00:17 +1200 Subject: [PATCH 681/803] ipv4: drop dst in multicast routing path kmemleak reports the following when routing multicast traffic over an ipsec tunnel. Kmemleak output: unreferenced object 0x8000000044bebb00 (size 256): comm "softirq", pid 0, jiffies 4294985356 (age 126.810s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 80 00 00 00 05 13 74 80 ..............t. 80 00 00 00 04 9b bf f9 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f83947e0>] __kmalloc+0x1e8/0x300 [<00000000b7ed8dca>] metadata_dst_alloc+0x24/0x58 [<0000000081d32c20>] __ipgre_rcv+0x100/0x2b8 [<00000000824f6cf1>] gre_rcv+0x178/0x540 [<00000000ccd4e162>] gre_rcv+0x7c/0xd8 [<00000000c024b148>] ip_protocol_deliver_rcu+0x124/0x350 [<000000006a483377>] ip_local_deliver_finish+0x54/0x68 [<00000000d9271b3a>] ip_local_deliver+0x128/0x168 [<00000000bd4968ae>] xfrm_trans_reinject+0xb8/0xf8 [<0000000071672a19>] tasklet_action_common.isra.16+0xc4/0x1b0 [<0000000062e9c336>] __do_softirq+0x1fc/0x3e0 [<00000000013d7914>] irq_exit+0xc4/0xe0 [<00000000a4d73e90>] plat_irq_dispatch+0x7c/0x108 [<000000000751eb8e>] handle_int+0x16c/0x178 [<000000001668023b>] _raw_spin_unlock_irqrestore+0x1c/0x28 The metadata dst is leaked when ip_route_input_mc() updates the dst for the skb. Commit f38a9eb1f77b ("dst: Metadata destinations") correctly handled dropping the dst in ip_route_input_slow() but missed the multicast case which is handled by ip_route_input_mc(). Drop the dst in ip_route_input_mc() avoiding the leak. Fixes: f38a9eb1f77b ("dst: Metadata destinations") Signed-off-by: Lokesh Dhoundiyal Signed-off-by: Chris Packham Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220505020017.3111846-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98c6f3429593..57abd27e842c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1753,6 +1753,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); + skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); return 0; } From 87fd2b091fb33871a7f812658a0971e8e26f903f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2022 15:21:34 +0100 Subject: [PATCH 682/803] drm/nouveau/tegra: Stop using iommu_present() Even if some IOMMU has registered itself on the platform "bus", that doesn't necessarily mean it provides translation for the device we care about. Replace iommu_present() with a more appropriate check. Signed-off-by: Robin Murphy Reviewed-by: Lyude Paul [added cc for stable] Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org # v5.0+ Link: https://patchwork.freedesktop.org/patch/msgid/70d40ea441da3663c2824d54102b471e9a621f8a.1649168494.git.robin.murphy@arm.com --- drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 992cc285f2fe..2ed528c065fa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) mutex_init(&tdev->iommu.mutex); - if (iommu_present(&platform_bus_type)) { + if (device_iommu_mapped(dev)) { tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); if (!tdev->iommu.domain) goto error; From ab244be47a8f111bc82496a8a20c907236e37f95 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 9 Feb 2022 07:03:11 +0100 Subject: [PATCH 683/803] drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name() If successful ida_simple_get() calls are not undone when needed, some additional memory may be allocated and wasted. Here, an ID between 0 and MAX_INT is required. If this ID is >=100, it is not taken into account and is wasted. It should be released. Instead of calling ida_simple_remove(), take advantage of the 'max' parameter to require the ID not to be too big. Should it be too big, it is not allocated and don't need to be freed. While at it, use ida_alloc_xxx()/ida_free() instead to ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Fixes: db1a0ae21461 ("drm/nouveau/bl: Assign different names to interfaces") Signed-off-by: Christophe JAILLET Reviewed-by: Lyude Paul [Fixed formatting warning from checkpatch] Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/9ba85bca59df6813dc029e743a836451d5173221.1644386541.git.christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index daf9f87477ba..a2141d3d9b1d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -46,8 +46,9 @@ static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], struct nouveau_backlight *bl) { - const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL); - if (nb < 0 || nb >= 100) + const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL); + + if (nb < 0) return false; if (nb > 0) snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); @@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector) nv_encoder, ops, &props); if (IS_ERR(bl->dev)) { if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); ret = PTR_ERR(bl->dev); goto fail_alloc; } @@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector) return; if (bl->id >= 0) - ida_simple_remove(&bl_ida, bl->id); + ida_free(&bl_ida, bl->id); backlight_device_unregister(bl->dev); nv_conn->backlight = NULL; From d5076fe4049cadef1f040eda4aaa001bb5424225 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 May 2022 09:19:46 -0700 Subject: [PATCH 684/803] netlink: do not reset transport header in netlink_recvmsg() netlink_recvmsg() does not need to change transport header. If transport header was needed, it should have been reset by the producer (netlink_dump()), not the consumer(s). The following trace probably happened when multiple threads were using MSG_PEEK. BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg write to 0xffff88811e9f15b2 of 2 bytes by task 32012 on cpu 1: skb_reset_transport_header include/linux/skbuff.h:2760 [inline] netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] __sys_recvfrom+0x204/0x2c0 net/socket.c:2097 __do_sys_recvfrom net/socket.c:2115 [inline] __se_sys_recvfrom net/socket.c:2111 [inline] __x64_sys_recvfrom+0x74/0x90 net/socket.c:2111 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff88811e9f15b2 of 2 bytes by task 32005 on cpu 0: skb_reset_transport_header include/linux/skbuff.h:2760 [inline] netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 ____sys_recvmsg+0x162/0x2f0 ___sys_recvmsg net/socket.c:2674 [inline] __sys_recvmsg+0x209/0x3f0 net/socket.c:2704 __do_sys_recvmsg net/socket.c:2714 [inline] __se_sys_recvmsg net/socket.c:2711 [inline] __x64_sys_recvmsg+0x42/0x50 net/socket.c:2711 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0xffff -> 0x0000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 32005 Comm: syz-executor.4 Not tainted 5.18.0-rc1-syzkaller-00328-ge1f700ebd6be-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20220505161946.2867638-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 05a3795eac8e..73e9c0a9c187 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = len; } - skb_reset_transport_header(data_skb); err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { From 1c7ab9cd98b78bef1657a5db7204d8d437e24c94 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 May 2022 16:31:01 -0700 Subject: [PATCH 685/803] net: chelsio: cxgb4: Avoid potential negative array offset Using min_t(int, ...) as a potential array index implies to the compiler that negative offsets should be allowed. This is not the case, though. Replace "int" with "unsigned int". Fixes the following warning exposed under future CONFIG_FORTIFY_SOURCE improvements: In file included from include/linux/string.h:253, from include/linux/bitmap.h:11, from include/linux/cpumask.h:12, from include/linux/smp.h:13, from include/linux/lockdep.h:14, from include/linux/rcupdate.h:29, from include/linux/rculist.h:11, from include/linux/pid.h:5, from include/linux/sched.h:14, from include/linux/delay.h:23, from drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:35: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function 't4_get_raw_vpd_params': include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 29 and size [2147483648, 4294967295] [-Warray-bounds] 46 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' 388 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2796:9: note: in expansion of macro 'memcpy' 2796 | memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); | ^~~~~~ include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 0 and size [2147483648, 4294967295] [-Warray-bounds] 46 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' 388 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2798:9: note: in expansion of macro 'memcpy' 2798 | memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); | ^~~~~~ Additionally remove needless cast from u8[] to char * in last strim() call. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202205031926.FVP7epJM-lkp@intel.com Fixes: fc9279298e3a ("cxgb4: Search VPD with pci_vpd_find_ro_info_keyword()") Fixes: 24c521f81c30 ("cxgb4: Use pci_vpd_find_id_string() to find VPD ID string") Cc: Raju Rangoju Cc: Eric Dumazet Cc: Paolo Abeni Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220505233101.1224230-1-keescook@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e7b4e3ed056c..8d719f82854a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) goto out; na = ret; - memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); + memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN)); strim(p->id); - memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); + memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN)); strim(p->sn); - memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN)); + memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN)); strim(p->pn); - memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN)); - strim((char *)p->na); + memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN)); + strim(p->na); out: vfree(vpd); From 1b5853dfab7fdde450f00f145327342238135c8a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 6 May 2022 15:22:25 +0200 Subject: [PATCH 686/803] fbdev: efifb: Fix a use-after-free due early fb_info cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove") attempted to fix a use-after-free error due driver freeing the fb_info in the .remove handler instead of doing it in .fb_destroy. But ironically that change introduced yet another use-after-free since the fb_info was still used after the free. This should fix for good by freeing the fb_info at the end of the handler. Fixes: d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove") Reported-by: Ville Syrjälä Reported-by: Andrzej Hajda Signed-off-by: Javier Martinez Canillas Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Reviewed-by: Thomas Zimmermann Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220506132225.588379-1-javierm@redhat.com --- drivers/video/fbdev/efifb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index cfa3dc0b4eee..b3d5f884c544 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -259,12 +259,12 @@ static void efifb_destroy(struct fb_info *info) memunmap(info->screen_base); } - framebuffer_release(info); - if (request_mem_succeeded) release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); fb_dealloc_cmap(&info->cmap); + + framebuffer_release(info); } static const struct fb_ops efifb_ops = { From 3d1b0d351441c2be7de082b92f43d0bfdc95a8f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 May 2022 13:48:21 -0400 Subject: [PATCH 687/803] Revert "SUNRPC: Ensure gss-proxy connects on setup" This reverts commit 892de36fd4a98fab3298d417c051d9099af5448d. The gssproxy server is unresponsive when it calls into the kernel to start the upcall service, so it will not reply to our RPC ping at all. Reported-by: "J.Bruce Fields" Fixes: 892de36fd4a9 ("SUNRPC: Ensure gss-proxy connects on setup") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- net/sunrpc/clnt.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index db5149567305..267b7aeaf1a6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,7 +160,6 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) -#define RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 8ca1d809b78d..61c276bddaf2 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -97,7 +97,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * timeout, which would result in reconnections being * done without the correct namespace: */ - .flags = RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL | + .flags = RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 22c28cf43eba..98133aa54f19 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -479,9 +479,6 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { int err = rpc_ping(clnt); - if ((args->flags & RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL) && - err == -EOPNOTSUPP) - err = 0; if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); From fd13359f54ee854f00134abc6be32da94ec53dbf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 May 2022 13:53:59 -0400 Subject: [PATCH 688/803] SUNRPC: Ensure that the gssproxy client can start in a connected state Ensure that the gssproxy client connects to the server from the gssproxy daemon process context so that the AF_LOCAL socket connection is done using the correct path and namespaces. Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 + net/sunrpc/clnt.c | 33 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 267b7aeaf1a6..90501404fa49 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf2..f549e4c05def 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * done without the correct namespace: */ .flags = RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_CONNECTED | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 98133aa54f19..e2c6eca0271b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task, static int rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static int rpc_ping_noreply(struct rpc_clnt *clnt); static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) @@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, rpc_shutdown_client(clnt); return ERR_PTR(err); } + } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) { + int err = rpc_ping_noreply(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } } clnt->cl_softrtry = 1; @@ -2709,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; +static const struct rpc_procinfo rpcproc_null_noreply = { + .p_encode = rpcproc_encode_null, +}; + static void rpc_null_call_prepare(struct rpc_task *task, void *data) { @@ -2762,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt) return status; } +static int rpc_ping_noreply(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null_noreply, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = &rpc_null_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + struct rpc_cb_add_xprt_calldata { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; From f71f01394f742fc4558b3f9f4c7ef4c4cf3b07c8 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:07 +0200 Subject: [PATCH 689/803] floppy: use a statically allocated error counter Interrupt handler bad_flp_intr() may cause a UAF on the recently freed request just to increment the error count. There's no point keeping that one in the request anyway, and since the interrupt handler uses a static pointer to the error which cannot be kept in sync with the pending request, better make it use a static error counter that's reset for each new request. This reset now happens when entering redo_fd_request() for a new request via set_next_request(). One initial concern about a single error counter was that errors on one floppy drive could be reported on another one, but this problem is not real given that the driver uses a single drive at a time, as that PC-compatible controllers also have this limitation by using shared signals. As such the error count is always for the "current" drive. Reported-by: Minh Yuan Suggested-by: Linus Torvalds Tested-by: Denis Efremov Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- drivers/block/floppy.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d5b9ff9bcbb2..015841f50f4e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -509,8 +509,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -530,7 +530,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1455,7 +1454,7 @@ static int interpret_errors(void) if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= drive_params[current_drive].max_errors.reporting) { + } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) @@ -2095,7 +2094,7 @@ static void bad_flp_intr(void) if (!next_valid_format(current_drive)) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); @@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2759,10 +2757,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } /* Starts or continues processing request. Will automatically unlock the @@ -2821,7 +2820,6 @@ do_request: _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); From f3b10a3c22c6a5f1d623b70eca2b4d1efafccd71 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:08 +0200 Subject: [PATCH 690/803] ataflop: use a statically allocated error counters This is the last driver making use of fd_request->error_count, which is easy to get wrong as was shown in floppy.c. We don't need to keep it there, it can be moved to the atari_floppy_struct instead, so let's do this. Suggested-by: Linus Torvalds Cc: Minh Yuan Cc: Geert Uytterhoeven Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- drivers/block/ataflop.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 5d819a466e2f..e232cc4fd444 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -303,6 +303,7 @@ static struct atari_floppy_struct { int ref; int type; struct blk_mq_tag_set tag_set; + int error_count; } unit[FD_MAX_UNITS]; #define UD unit[drive] @@ -705,14 +706,14 @@ static void fd_error( void ) if (!fd_request) return; - fd_request->error_count++; - if (fd_request->error_count >= MAX_ERRORS) { + unit[SelectedDrive].error_count++; + if (unit[SelectedDrive].error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(BLK_STS_IOERR); finish_fdc(); return; } - else if (fd_request->error_count == RECALIBRATE_ERRORS) { + else if (unit[SelectedDrive].error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; @@ -1491,7 +1492,7 @@ static void setup_req_params( int drive ) ReqData = ReqBuffer + 512 * ReqCnt; if (UseTrackbuffer) - read_track = (ReqCmd == READ && fd_request->error_count == 0); + read_track = (ReqCmd == READ && unit[drive].error_count == 0); else read_track = 0; @@ -1520,6 +1521,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_RESOURCE; } fd_request = bd->rq; + unit[drive].error_count = 0; blk_mq_start_request(fd_request); atari_disable_irq( IRQ_MFP_FDC ); From 2e3afb42dd480d755cd7d97ca04586a5616c1a5e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:09 +0200 Subject: [PATCH 691/803] blk-mq: remove the error_count from struct request The last two users were floppy.c and ataflop.c respectively, it was verified that no other drivers makes use of this, so let's remove it. Suggested-by: Linus Torvalds Cc: Minh Yuan Cc: Denis Efremov , Cc: Geert Uytterhoeven Cc: Christoph Hellwig Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7aa5c54901a9..9f07061418db 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -163,7 +163,6 @@ struct request { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; void *completion_data; - int error_count; /* for legacy drivers, don't use */ }; From 9dc4241bb14afecd16518a0760bceb3d7359b12a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 7 May 2022 15:31:16 +0200 Subject: [PATCH 692/803] Revert "parisc: Mark cr16 CPU clocksource unstable on all SMP machines" This reverts commit afdb4a5b1d340e4afffc65daa21cc71890d7d589. It triggers RCU stalls at boot with a 32-bit kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin Cc: stable@vger.kernel.org # v5.16+ --- arch/parisc/kernel/time.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index bb27dfeeddfc..95ee9e1a364b 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -251,16 +251,30 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs, even if - * they share the same socket. + * The cr16 interval timers are not syncronized across CPUs on + * different sockets, so mark them unstable and lower rating on + * multi-socket SMP systems. */ if (num_online_cpus() > 1 && !running_on_qemu) { - /* mark sched_clock unstable */ - clear_sched_clock_stable(); + int cpu; + unsigned long cpu0_loc; + cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + if ((cpu0_loc != 0) && + (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) + continue; + + /* mark sched_clock unstable */ + clear_sched_clock_stable(); + + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + break; + } } /* register at clocksource framework */ From 7962c0896429af2a0e00ec6bc15d992536453b2d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 7 May 2022 15:32:38 +0200 Subject: [PATCH 693/803] Revert "parisc: Mark sched_clock unstable only if clocks are not syncronized" This reverts commit d97180ad68bdb7ee10f327205a649bc2f558741d. It triggers RCU stalls at boot with a 32-bit kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin Cc: stable@vger.kernel.org # v5.15+ --- arch/parisc/kernel/setup.c | 2 ++ arch/parisc/kernel/time.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index b91cb45ffd4e..f005ddedb50e 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -161,6 +161,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PA11 dma_ops_init(); #endif + + clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 95ee9e1a364b..19c31a72fe76 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -267,9 +267,6 @@ static int __init init_cr16_clocksource(void) (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; - /* mark sched_clock unstable */ - clear_sched_clock_stable(); - clocksource_cr16.name = "cr16_unstable"; clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; clocksource_cr16.rating = 0; @@ -277,6 +274,10 @@ static int __init init_cr16_clocksource(void) } } + /* XXX: We may want to mark sched_clock stable here if cr16 clocks are + * in sync: + * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ + /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); From 6c800d7f55fcd78e17deae5ae4374d8e73482c13 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 10:18:40 +0200 Subject: [PATCH 694/803] Revert "parisc: Fix patch code locking and flushing" This reverts commit a9fe7fa7d874a536e0540469f314772c054a0323. Leads to segfaults on 32bit kernel. Signed-off-by: Helge Deller --- arch/parisc/kernel/patch.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index e59574f65e64..80a0ab372802 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,7 +40,10 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - raw_spin_lock_irqsave(&patch_lock, *flags); + if (flags) + raw_spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -49,7 +52,10 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - raw_spin_unlock_irqrestore(&patch_lock, *flags); + if (flags) + raw_spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -61,9 +67,8 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_dcache_range_asm(start, end); - flush_kernel_icache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_kernel_vmap_range(addr, len); + flush_icache_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -76,10 +81,8 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_dcache_range_asm((unsigned long)fixmap, - (unsigned long)p); - flush_tlb_kernel_range((unsigned long)fixmap, - (unsigned long)p); + flush_kernel_vmap_range((void *)fixmap, + (p-fixmap) * sizeof(*p)); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -87,10 +90,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); - flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); + flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); + flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) From 0921244f6f4f0d05698b953fe632a99b38907226 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 09:19:11 +0200 Subject: [PATCH 695/803] parisc: Only list existing CPUs in cpu_possible_mask The inventory knows which CPUs are in the system, so this bitmask should be in cpu_possible_mask instead of the bitmask based on CONFIG_NR_CPUS. Reset the cpu_possible_mask before scanning the system for CPUs, and mark each existing CPU as possible during initialization of that CPU. This avoids those warnings later on too: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Noticed-by: John David Anglin --- arch/parisc/kernel/processor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index d98692115221..9e92b76b0ce0 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,6 +171,7 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; + set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -461,6 +462,13 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { + unsigned int cpu; + reset_cpu_topology(); + + /* reset possible mask. We will mark those which are possible. */ + for_each_possible_cpu(cpu) + set_cpu_possible(cpu, false); + register_parisc_driver(&cpu_driver); } From 7e93a3dd63db2341d094ab1d9ba29b5d8d5093d1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 18:55:27 +0200 Subject: [PATCH 696/803] parisc: Update 32- and 64-bit defconfigs Enable CONFIG_CGROUPS=y on 32-bit defconfig for systemd-support, and enable CONFIG_NAMESPACES and CONFIG_USER_NS. Signed-off-by: Helge Deller --- arch/parisc/configs/generic-32bit_defconfig | 4 +++- arch/parisc/configs/generic-64bit_defconfig | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index a5fee10d76ee..8ce0ae370680 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -6,6 +6,9 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 +CONFIG_CGROUPS=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y @@ -47,7 +50,6 @@ CONFIG_PARPORT=y CONFIG_PARPORT_PC=m CONFIG_PARPORT_1284=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=6144 CONFIG_BLK_DEV_SD=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 1b8fd80cbe7f..57501b0aed92 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -16,6 +16,7 @@ CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_CGROUP_PIDS=y CONFIG_CPUSETS=y +CONFIG_USER_NS=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -267,9 +268,9 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRC_CCITT=m CONFIG_LIBCRC32C=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_SCHED_DEBUG is not set From 1955c4f879a130c7822f483cf593338ad747aed4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2022 22:24:20 +0200 Subject: [PATCH 697/803] parisc: Re-enable GENERIC_CPU_DEVICES for !SMP In commit 62773112acc5 ("parisc: Switch from GENERIC_CPU_DEVICES to GENERIC_ARCH_TOPOLOGY") GENERIC_CPU_DEVICES was unconditionally turned off, but this triggers a warning in topology_add_dev(). Turning it back on for the !SMP case avoids this warning. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Fixes: 62773112acc5 ("parisc: Switch from GENERIC_CPU_DEVICES to GENERIC_ARCH_TOPOLOGY") Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 52e550b45692..bd22578859d0 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -38,6 +38,7 @@ config PARISC select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_CPU_DEVICES if !SMP select GENERIC_LIB_DEVMEM_IS_ALLOWED select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE From 5b89966bc96a06f6ad65f64ae4b0461918fcc9d3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 3 Apr 2022 21:57:51 +0200 Subject: [PATCH 698/803] parisc: Merge model and model name into one line in /proc/cpuinfo The Linux tool "lscpu" shows the double amount of CPUs if we have "model" and "model name" in two different lines in /proc/cpuinfo. This change combines the model and the model name into one line. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- arch/parisc/kernel/processor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 9e92b76b0ce0..26eb568f8b96 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -420,8 +420,7 @@ show_cpuinfo (struct seq_file *m, void *v) } seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities); - seq_printf(m, "model\t\t: %s\n" - "model name\t: %s\n", + seq_printf(m, "model\t\t: %s - %s\n", boot_cpu_data.pdc.sys_model_name, cpuinfo->dev ? cpuinfo->dev->name : "Unknown"); From 234ff4c585d704896450a3634a7c29fa4e1907e1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 5 Apr 2022 21:28:37 +0200 Subject: [PATCH 699/803] parisc: Change MAX_ADDRESS to become unsigned long long Dave noticed that for the 32-bit kernel MAX_ADDRESS should be a ULL, otherwise this define would become 0: MAX_ADDRESS (1UL << MAX_ADDRBITS) It has no real effect on the kernel. Signed-off-by: Helge Deller Noticed-by: John David Anglin --- arch/parisc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 939db6fe620b..69765a6dbe89 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -160,7 +160,7 @@ extern void __update_cache(pte_t pte); #define SPACEID_SHIFT (MAX_ADDRBITS - 32) #else #define MAX_ADDRBITS (BITS_PER_LONG) -#define MAX_ADDRESS (1UL << MAX_ADDRBITS) +#define MAX_ADDRESS (1ULL << MAX_ADDRBITS) #define SPACEID_SHIFT 0 #endif From a65bcad5421507c2f6c52e1e2ca6a6ce02fd1ad6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 30 Apr 2022 21:07:18 +0200 Subject: [PATCH 700/803] parisc: Fix typos in comments Various spelling mistakes in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Helge Deller --- arch/parisc/kernel/kprobes.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/parisc/math-emu/dfadd.c | 2 +- arch/parisc/math-emu/dfsub.c | 2 +- arch/parisc/math-emu/sfadd.c | 2 +- arch/parisc/math-emu/sfsub.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index 3343d2fb7889..6e0b86652f30 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -152,7 +152,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) /* for absolute branch instructions we can copy iaoq_b. for relative * branch instructions we need to calculate the new address based on the * difference between iaoq_f and iaoq_b. We cannot use iaoq_b without - * modificationt because it's based on our ainsn.insn address. + * modifications because it's based on our ainsn.insn address. */ if (p->post_handler) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index a6e61cf2cad0..b78f1b9d45c1 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -469,7 +469,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o * panic notifiers, and we should call panic * directly from the location that we wish. * e.g. We should not call panic from - * parisc_terminate, but rather the oter way around. + * parisc_terminate, but rather the other way around. * This hack works, prints the panic message twice, * and it enables reboot timers! */ diff --git a/arch/parisc/math-emu/dfadd.c b/arch/parisc/math-emu/dfadd.c index ec487e07f004..00e561d4aa55 100644 --- a/arch/parisc/math-emu/dfadd.c +++ b/arch/parisc/math-emu/dfadd.c @@ -253,7 +253,7 @@ dbl_fadd( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/dfsub.c b/arch/parisc/math-emu/dfsub.c index c4f30acf2d48..4f03782284bd 100644 --- a/arch/parisc/math-emu/dfsub.c +++ b/arch/parisc/math-emu/dfsub.c @@ -256,7 +256,7 @@ dbl_fsub( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/sfadd.c b/arch/parisc/math-emu/sfadd.c index 838758279d5b..9b98c874dfac 100644 --- a/arch/parisc/math-emu/sfadd.c +++ b/arch/parisc/math-emu/sfadd.c @@ -249,7 +249,7 @@ sgl_fadd( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { diff --git a/arch/parisc/math-emu/sfsub.c b/arch/parisc/math-emu/sfsub.c index 583d3ace4634..29d9eed09d12 100644 --- a/arch/parisc/math-emu/sfsub.c +++ b/arch/parisc/math-emu/sfsub.c @@ -252,7 +252,7 @@ sgl_fsub( return(NOEXCEPTION); } right_exponent = 1; /* Set exponent to reflect different bias - * with denomalized numbers. */ + * with denormalized numbers. */ } else { From 340233dcc0160aafcce46ca893d1679f16acf409 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 18:25:00 +0200 Subject: [PATCH 701/803] parisc: Mark cr16 clock unstable on all SMP machines The cr16 interval timers are not synchronized across CPUs, even with just one dual-core CPU. This becomes visible if the machines have a longer uptime. Signed-off-by: Helge Deller --- arch/parisc/kernel/time.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 19c31a72fe76..9714fbd7c42d 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -251,33 +251,14 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not synchronized across CPUs. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } - /* XXX: We may want to mark sched_clock stable here if cr16 clocks are - * in sync: - * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ - /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); From ba0c04104082ca211e108dd8eec6db2ad7676528 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 8 May 2022 19:55:13 +0200 Subject: [PATCH 702/803] Revert "parisc: Increase parisc_cache_flush_threshold setting" This reverts commit a58e9d0984e8dad53f17ec73ae3c1cc7f8d88151. Triggers segfaults with 32-bit kernels on PA8500 machines. Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 23348199f3f8..e7911225a4f8 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -403,7 +403,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size; - unsigned long threshold, threshold2; + unsigned long threshold; alltime = mfctl(16); flush_data_cache(); @@ -418,20 +418,8 @@ void __init parisc_setup_cache_timing(void) alltime, size, rangetime); threshold = L1_CACHE_ALIGN(size * alltime / rangetime); - - /* - * The threshold computed above isn't very reliable since the - * flush times depend greatly on the percentage of dirty lines - * in the flush range. Further, the whole cache time doesn't - * include the time to refill lines that aren't in the mm/vma - * being flushed. By timing glibc build and checks on mako cpus, - * the following formula seems to work reasonably well. The - * value from the timing calculation is too small, and increases - * build and check times by almost a factor two. - */ - threshold2 = cache_info.dc_size * num_online_cpus(); - if (threshold2 > threshold) - threshold = threshold2; + if (threshold > cache_info.dc_size) + threshold = cache_info.dc_size; if (threshold) parisc_cache_flush_threshold = threshold; printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", From c5eb0a61238dd6faf37f58c9ce61c9980aaffd7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 May 2022 13:54:17 -0700 Subject: [PATCH 703/803] Linux 5.18-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a820c525b86..2284d1ca2503 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Superb Owl # *DOCUMENTATION* From 183d4f2d23acecb29695f4d07427c8594e3a9691 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 28 Apr 2022 13:29:11 -0700 Subject: [PATCH 704/803] perf bench: Fix two numa NDEBUG warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG_ON is a no-op if NDEBUG is defined, otherwise it is an assert. Compiling with NDEBUG yields: bench/numa.c: In function ‘bind_to_cpu’: bench/numa.c:314:1: error: control reaches end of non-void function [-Werror=return-type] 314 | } | ^ bench/numa.c: In function ‘bind_to_node’: bench/numa.c:367:1: error: control reaches end of non-void function [-Werror=return-type] 367 | } | ^ Add return statements to cover this case. Reviewed-by: Athira Jajeev Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220428202912.1056444-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 44e1f8a44087..d5289fa58a4f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -311,6 +311,7 @@ err_out: /* BUG_ON due to failure in allocation of orig_mask/mask */ BUG_ON(-1); + return NULL; } static cpu_set_t *bind_to_node(int target_node) @@ -364,6 +365,7 @@ err_out: /* BUG_ON due to failure in allocation of orig_mask/mask */ BUG_ON(-1); + return NULL; } static void bind_to_cpumask(cpu_set_t *mask) From 45fa7c38696bae632310c2876ba81fdfa25cc9c2 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 28 Apr 2022 10:19:47 -0500 Subject: [PATCH 705/803] perf tests: Fix coresight `perf test` failure. Currently the `perf test` always fails the coresight test like: 89: Check Arm CoreSight trace data recording and synthesized samples: FAILED! That is because the test_arm_coresight.sh is attempting to SIGINT the parent but is using $$ rather than $PPID and it sigint's itself when run under the perf test framework. Since this is done in a trap clause it ends up returning a non zero return. Since $PPID is a bash ism and not all distros are linking /bin/sh to bash, the alternative parent pid lookups are uglier than just dropping the kill, and its not strictly needed, lets pick the simple solution and drop the sigint. Fixes: 133fe2e617e48ca0 ("perf tests: Improve temp file cleanup in test_arm_coresight.sh") Reviewed-by: James Clark Signed-off-by: Jeremy Linton Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Jeremy Linton Link: https://lore.kernel.org/r/20220428151947.290146-1-jeremy.linton@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 6de53b7ef5ff..e4cb4f1806ff 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -29,7 +29,6 @@ cleanup_files() rm -f ${file} rm -f "${perfdata}.old" trap - exit term int - kill -2 $$ exit $glb_err } From 474e76c4075c10461c4373d932a083e25d6adf3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:39:02 -0300 Subject: [PATCH 706/803] tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: d495f942f40aa412 ("KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the the 'perf trace' ioctl syscall argument beautifiers. This is also by now used by tools/testing/selftests/kvm/, a simple test build succeeded. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Link: http://lore.kernel.org/lkml/YnE5BIweGmCkpOTN@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 91a6fe4e02c0..6a184d260c7f 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -445,7 +445,13 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING From 3220c3b2115102bb35f8f07d90d2989a3f5eb452 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 20 Apr 2022 11:57:20 +0200 Subject: [PATCH 707/803] drm/i915: Fix race in __i915_vma_remove_closed i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma. Instead the lock needs to be taken before actually checking. v2: move struct declaration Cc: Chris Wilson Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: # v5.3+ Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock") Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220420095720.3331609-1-kherbst@redhat.com (cherry picked from commit 1df1c79cbb7ac9bf148930be3418973c76ba8dde) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 94fcdb7bd21d..eeaa8d0d0407 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1605,17 +1605,17 @@ void i915_vma_close(struct i915_vma *vma) static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - - spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) { + struct intel_gt *gt = vma->vm->gt; + + spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); } void i915_vma_release(struct kref *ref) @@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1650,7 +1651,9 @@ static void release_references(struct i915_vma *vma) rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); + spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); __i915_vma_put(vma); } From 49e6123c65dac6393b04f39ceabf79c44f66b8be Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 4 May 2022 12:32:27 +0000 Subject: [PATCH 708/803] net: sfc: fix memory leak due to ptp channel It fixes memory leak in ring buffer change logic. When ring buffer size is changed(ethtool -G eth0 rx 4096), sfc driver works like below. 1. stop all channels and remove ring buffers. 2. allocates new buffer array. 3. allocates rx buffers. 4. start channels. While the above steps are working, it skips some steps if the channel doesn't have a ->copy callback function. Due to ptp channel doesn't have ->copy callback, these above steps are skipped for ptp channel. It eventually makes some problems. a. ptp channel's ring buffer size is not changed, it works only 1024(default). b. memory leak. The reason for memory leak is to use the wrong ring buffer values. There are some values, which is related to ring buffer size. a. efx->rxq_entries - This is global value of rx queue size. b. rx_queue->ptr_mask - used for access ring buffer as circular ring. - roundup_pow_of_two(efx->rxq_entries) - 1 c. rx_queue->max_fill - efx->rxq_entries - EFX_RXD_HEAD_ROOM These all values should be based on ring buffer size consistently. But ptp channel's values are not. a. efx->rxq_entries - This is global(for sfc) value, always new ring buffer size. b. rx_queue->ptr_mask - This is always 1023(default). c. rx_queue->max_fill - This is new ring buffer size - EFX_RXD_HEAD_ROOM. Let's assume we set 4096 for rx ring buffer, normal channel ptp channel efx->rxq_entries 4096 4096 rx_queue->ptr_mask 4095 1023 rx_queue->max_fill 4086 4086 sfc driver allocates rx ring buffers based on these values. When it allocates ptp channel's ring buffer, 4086 ring buffers are allocated then, these buffers are attached to the allocated array. But ptp channel's ring buffer array size is still 1024(default) and ptr_mask is still 1023 too. So, 3062 ring buffers will be overwritten to the array. This is the reason for memory leak. Test commands: ethtool -G rx 4096 while : do ip link set up ip link set down done In order to avoid this problem, it adds ->copy callback to ptp channel type. So that rx_queue->ptr_mask value will be updated correctly. Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 7 ++++++- drivers/net/ethernet/sfc/ptp.c | 14 +++++++++++++- drivers/net/ethernet/sfc/ptp.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 377df8b7f015..40df910aa140 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx) int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) { - struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; + struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel, + *ptp_channel = efx_ptp_channel(efx); + struct efx_ptp_data *ptp_data = efx->ptp_data; unsigned int i, next_buffer_table = 0; u32 old_rxq_entries, old_txq_entries; int rc, rc2; @@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) efx_set_xdp_channels(efx); out: + efx->ptp_data = NULL; /* Destroy unused channel structures */ for (i = 0; i < efx->n_channels; i++) { channel = other_channel[i]; @@ -948,6 +951,7 @@ out: } } + efx->ptp_data = ptp_data; rc2 = efx_soft_enable_interrupts(efx); if (rc2) { rc = rc ? rc : rc2; @@ -966,6 +970,7 @@ rollback: efx->txq_entries = old_txq_entries; for (i = 0; i < efx->n_channels; i++) swap(efx->channel[i], other_channel[i]); + efx_ptp_update_channel(efx, ptp_channel); goto out; } diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index f0ef515e2ade..4625f85acab2 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -45,6 +45,7 @@ #include "farch_regs.h" #include "tx.h" #include "nic.h" /* indirectly includes ptp.h */ +#include "efx_channels.h" /* Maximum number of events expected to make up a PTP event */ #define MAX_EVENT_FRAGS 3 @@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx) return efx->ptp_data ? efx->ptp_data->channel : NULL; } +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel) +{ + if (efx->ptp_data) + efx->ptp_data->channel = channel; +} + static u32 last_sync_timestamp_major(struct efx_nic *efx) { struct efx_channel *channel = efx_ptp_channel(efx); @@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) int rc = 0; unsigned int pos; + if (efx->ptp_data) { + efx->ptp_data->channel = channel; + return 0; + } + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); efx->ptp_data = ptp; if (!efx->ptp_data) @@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = { .pre_probe = efx_ptp_probe_channel, .post_remove = efx_ptp_remove_channel, .get_name = efx_ptp_get_channel_name, - /* no copy operation; there is no need to reallocate this channel */ + .copy = efx_copy_channel, .receive_skb = efx_ptp_rx, .want_txqs = efx_ptp_want_txqs, .keep_eventq = false, diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h index 9855e8c9e544..7b1ef7002b3f 100644 --- a/drivers/net/ethernet/sfc/ptp.h +++ b/drivers/net/ethernet/sfc/ptp.h @@ -16,6 +16,7 @@ struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_ptp_channel(struct efx_nic *efx); +void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_remove(struct efx_nic *efx); int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); From cf3ab8d4a797960b4be20565abb3bcd227b18a68 Mon Sep 17 00:00:00 2001 From: Lina Wang Date: Thu, 5 May 2022 13:48:49 +0800 Subject: [PATCH 709/803] net: fix wrong network header length When clatd starts with ebpf offloaing, and NETIF_F_GRO_FRAGLIST is enable, several skbs are gathered in skb_shinfo(skb)->frag_list. The first skb's ipv6 header will be changed to ipv4 after bpf_skb_proto_6_to_4, network_header\transport_header\mac_header have been updated as ipv4 acts, but other skbs in frag_list didnot update anything, just ipv6 packets. udp_queue_rcv_skb will call skb_segment_list to traverse other skbs in frag_list and make sure right udp payload is delivered to user space. Unfortunately, other skbs in frag_list who are still ipv6 packets are updated like the first skb and will have wrong transport header length. e.g.before bpf_skb_proto_6_to_4,the first skb and other skbs in frag_list has the same network_header(24)& transport_header(64), after bpf_skb_proto_6_to_4, ipv6 protocol has been changed to ipv4, the first skb's network_header is 44,transport_header is 64, other skbs in frag_list didnot change.After skb_segment_list, the other skbs in frag_list has different network_header(24) and transport_header(44), so there will be 20 bytes different from original,that is difference between ipv6 header and ipv4 header. Just change transport_header to be the same with original. Actually, there are two solutions to fix it, one is traversing all skbs and changing every skb header in bpf_skb_proto_6_to_4, the other is modifying frag_list skb's header in skb_segment_list. Considering efficiency, adopt the second one--- when the first skb and other skbs in frag_list has different network_header length, restore them to make sure right udp payload is delivered to user space. Signed-off-by: Lina Wang Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 30b523fa4ad2..c90c74de90d5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; - int err; + int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); From edae34a3ed9293b5077dddf9e51a3d86c95dc76a Mon Sep 17 00:00:00 2001 From: Lina Wang Date: Thu, 5 May 2022 13:48:50 +0800 Subject: [PATCH 710/803] selftests net: add UDP GRO fraglist + bpf self-tests When NET_F_F_GRO_FRAGLIST is enabled and bpf_skb_change_proto is used, check if udp packets and tcp packets are successfully delivered to user space. If wrong udp packets are delivered, udpgso_bench_rx will exit with "Initial byte out of range" Signed-off-by: Maciej enczykowski Signed-off-by: Lina Wang Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 3 + tools/testing/selftests/net/bpf/Makefile | 14 + tools/testing/selftests/net/bpf/nat6to4.c | 285 ++++++++++++++++++ tools/testing/selftests/net/udpgro_frglist.sh | 101 +++++++ 4 files changed, 403 insertions(+) create mode 100644 tools/testing/selftests/net/bpf/Makefile create mode 100644 tools/testing/selftests/net/bpf/nat6to4.c create mode 100755 tools/testing/selftests/net/udpgro_frglist.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0f2ebc38d893..e1f998defd10 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh +TEST_PROGS += udpgro_frglist.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh @@ -61,6 +62,8 @@ TEST_FILES := settings KSFT_KHDR_INSTALL := 1 include ../lib.mk +include bpf/Makefile + $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile new file mode 100644 index 000000000000..f91bf14bbee7 --- /dev/null +++ b/tools/testing/selftests/net/bpf/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +CLANG ?= clang +CCINCLUDE += -I../../bpf +CCINCLUDE += -I../../../../../usr/include/ + +TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o +all: $(TEST_CUSTOM_PROGS) + +$(OUTPUT)/%.o: %.c + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ + +clean: + rm -f $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c new file mode 100644 index 000000000000..ac54c36b25fc --- /dev/null +++ b/tools/testing/selftests/net/bpf/nat6to4.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code is taken from the Android Open Source Project and the author + * (Maciej Żenczykowski) has gave permission to relicense it under the + * GPLv2. Therefore this program is free software; + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 2 as published by the Free Software + * Foundation + + * The original headers, including the original license headers, are + * included below for completeness. + * + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +#include +#include + +#define IP_DF 0x4000 // Flag: "Don't Fragment" + +SEC("schedcls/ingress6/nat_6") +int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr * const eth = data; // used iff is_ethernet + const struct ipv6hdr * const ip6 = (void *)(eth + 1); + + // Require ethernet dst mac address to be our unicast address. + if (skb->pkt_type != PACKET_HOST) + return TC_ACT_OK; + + // Must be meta-ethernet IPv6 frame + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // Must have (ethernet and) ipv6 header + if (data + l2_header_size + sizeof(*ip6) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv6 + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // IP version must be 6 + if (ip6->version != 6) + return TC_ACT_OK; + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) + return TC_ACT_OK; + switch (ip6->nexthdr) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_UDP: // address means there is no need to update their checksums. + case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, + case IPPROTO_ESP: // since there is never a checksum to update. + break; + default: // do not know how to handle anything else + return TC_ACT_OK; + } + + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype + + struct iphdr ip = { + .version = 4, // u4 + .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 + .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 + .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 + .id = 0, // u16 + .frag_off = bpf_htons(IP_DF), // u16 + .ttl = ip6->hop_limit, // u8 + .protocol = ip6->nexthdr, // u8 + .check = 0, // u16 + .saddr = 0x0201a8c0, // u32 + .daddr = 0x0101a8c0, // u32 + }; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) + sum4 += ((__u16 *)&ip)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF + + // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) + for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) + sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation + + // Note that there is no L4 checksum update: we are relying on the checksum neutrality + // of the ipv6 address chosen by netd's ClatdController. + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) + return TC_ACT_OK; + bpf_csum_update(skb, sum6); + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + if (data + l2_header_size + sizeof(struct iphdr) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + + // Copy over the new ipv4 header. + *(struct iphdr *)(new_eth + 1) = ip; + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +SEC("schedcls/egress4/snat4") +int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr *const eth = data; // used iff is_ethernet + const struct iphdr *const ip4 = (void *)(eth + 1); + + // Must be meta-ethernet IPv4 frame + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // Must have ipv4 header + if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv4 + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // IP version must be 4 + if (ip4->version != 4) + return TC_ACT_OK; + + // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header + if (ip4->ihl != 5) + return TC_ACT_OK; + + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) + return TC_ACT_OK; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) + sum4 += ((__u16 *)ip4)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF + if (sum4 != 0xFFFF) + return TC_ACT_OK; + + // Minimum IPv4 total length is the size of the header + if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) + return TC_ACT_OK; + + // We are incapable of dealing with IPv4 fragments + if (ip4->frag_off & ~bpf_htons(IP_DF)) + return TC_ACT_OK; + + switch (ip4->protocol) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_GRE: // address means there is no need to update their checksums. + case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, + break; // since there is never a checksum to update. + + case IPPROTO_UDP: // See above comment, but must also have UDP header... + if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) + return TC_ACT_OK; + const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); + // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the + // checksum. Otherwise the network or more likely the NAT64 gateway might + // drop the packet because in most cases IPv6/UDP packets with a zero checksum + // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() + if (!uh->check) + return TC_ACT_OK; + break; + + default: // do not know how to handle anything else + return TC_ACT_OK; + } + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype + + struct ipv6hdr ip6 = { + .version = 6, // __u8:4 + .priority = ip4->tos >> 4, // __u8:4 + .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] + .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 + .nexthdr = ip4->protocol, // __u8 + .hop_limit = ip4->ttl, // __u8 + }; + ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.saddr.in6_u.u6_addr32[1] = 0; + ip6.saddr.in6_u.u6_addr32[2] = 0; + ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); + ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.daddr.in6_u.u6_addr32[1] = 0; + ip6.daddr.in6_u.u6_addr32[2] = 0; + ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); + + // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6.version == 6 + for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) + sum6 += ((__u16 *)&ip6)[i]; + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) + return TC_ACT_OK; + + // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. + // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, + // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. + // However, we've already verified the ipv4 checksum is correct and thus 0. + // Thus we only need to add the ipv6 header's sum. + // + // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error + // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). + bpf_csum_update(skb, sum6); + + // bpf_skb_change_proto() invalidates all pointers - reload them. + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + // I cannot think of any valid way for this error condition to trigger, however I do + // believe the explicit check is required to keep the in kernel ebpf verifier happy. + if (data + l2_header_size + sizeof(ip6) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + // Copy over the new ipv4 header. + *(struct ipv6hdr *)(new_eth + 1) = ip6; + return TC_ACT_OK; +} + +char _license[] SEC("license") = ("GPL"); diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh new file mode 100755 index 000000000000..807b74c8fd80 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local rx_args=${all#*rx} + + + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on + + + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + tc -n "${PEER_NS}" qdisc add dev veth1 clsact + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action + echo ${rx_args} + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + echo ${args} + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -u -S 0 rx -4 -v + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S 0 rx -4 -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx -4 -t +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv6" + run_tcp "${ipv6_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [ ! -f bpf/nat6to4.o ]; then + echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi From ceaf69f8eadcafb323392be88e7a5248c415d423 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 7 May 2022 11:00:28 +0300 Subject: [PATCH 711/803] fanotify: do not allow setting dirent events in mask of non-dir Dirent events (create/delete/move) are only reported on watched directory inodes, but in fanotify as well as in legacy inotify, it was always allowed to set them on non-dir inode, which does not result in any meaningful outcome. Until kernel v5.17, dirent events in fanotify also differed from events "on child" (e.g. FAN_OPEN) in the information provided in the event. For example, FAN_OPEN could be set in the mask of a non-dir or the mask of its parent and event would report the fid of the child regardless of the marked object. By contrast, FAN_DELETE is not reported if the child is marked and the child fid was not reported in the events. Since kernel v5.17, with fanotify group flag FAN_REPORT_TARGET_FID, the fid of the child is reported with dirent events, like events "on child", which may create confusion for users expecting the same behavior as events "on child" when setting events in the mask on a child. The desired semantics of setting dirent events in the mask of a child are not clear, so for now, deny this action for a group initialized with flag FAN_REPORT_TARGET_FID and for the new event FAN_RENAME. We may relax this restriction in the future if we decide on the semantics and implement them. Fixes: d61fd650e9d2 ("fanotify: introduce group flag FAN_REPORT_TARGET_FID") Fixes: 8cc3b1ccd930 ("fanotify: wire up FAN_RENAME event") Link: https://lore.kernel.org/linux-fsdevel/20220505133057.zm5t6vumc4xdcnsg@quack3.lan/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220507080028.219826-1-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9b32b76a9c30..a792e21c5309 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1657,6 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; + /* + * FAN_RENAME is not allowed on non-dir (for now). + * We shouldn't have allowed setting any dirent events in mask of + * non-dir, but because we always allowed it, error only if group + * was initialized with the new flag FAN_REPORT_TARGET_FID. + */ + ret = -ENOTDIR; + if (inode && !S_ISDIR(inode->i_mode) && + ((mask & FAN_RENAME) || + ((mask & FANOTIFY_DIRENT_EVENTS) && + FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) + goto path_put_and_out; + /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; From c7e34c1e263f71b2dd78a12b6b7259a89b3a1f44 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 6 May 2022 11:42:12 +0300 Subject: [PATCH 712/803] mailmap: update Kalle Valo's email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I switched to use my kernel.org address, the old kvalo@codeaurora.org address doesn't work anymore. Signed-off-by: Kalle Valo Tested-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220506084212.8952-1-kvalo@kernel.org --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index b9d358217586..2c05d7257509 100644 --- a/.mailmap +++ b/.mailmap @@ -204,6 +204,7 @@ Juha Yrjola Juha Yrjola Juha Yrjola Julien Thierry +Kalle Valo Kalyan Thota Kay Sievers Kees Cook From a59d55568d02bbbdf9c0cc15be9580180f855b4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 May 2022 23:04:21 +0200 Subject: [PATCH 713/803] mac80211_hwsim: fix RCU protected chanctx access We need to RCU protect the chanctx_conf access, so do that. Fixes: 585625c955b1 ("mac80211_hwsim: check TX and STA bandwidth") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220505230421.fb8055c081a2.Ic6da3307c77a909bd61a0ea25dc2a4b08fe1b03f@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 28bfa7b7b73c..3ac3693dbecb 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2202,11 +2202,14 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw, if (!data->use_chanctx) { confbw = data->bw; } else { - struct ieee80211_chanctx_conf *chanctx_conf = - rcu_dereference(vif->chanctx_conf); + struct ieee80211_chanctx_conf *chanctx_conf; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(vif->chanctx_conf); if (!WARN_ON(!chanctx_conf)) confbw = chanctx_conf->def.width; + rcu_read_unlock(); } WARN(bw > hwsim_get_chanwidth(confbw), From 9e2db50f1ef2238fc2f71c5de1c0418b7a5b0ea2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 May 2022 23:04:22 +0200 Subject: [PATCH 714/803] mac80211_hwsim: call ieee80211_tx_prepare_skb under RCU protection This is needed since it might use (and pass out) pointers to e.g. keys protected by RCU. Can't really happen here as the frames aren't encrypted, but we need to still adhere to the rules. Fixes: cacfddf82baf ("mac80211_hwsim: initialize ieee80211_tx_info at hw_scan_work") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220505230421.5f139f9de173.I77ae111a28f7c0e9fd1ebcee7f39dbec5c606770@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3ac3693dbecb..e9ec63e0e395 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2478,11 +2478,13 @@ static void hw_scan_work(struct work_struct *work) if (req->ie_len) skb_put_data(probe, req->ie, req->ie_len); + rcu_read_lock(); if (!ieee80211_tx_prepare_skb(hwsim->hw, hwsim->hw_scan_vif, probe, hwsim->tmp_chan->band, NULL)) { + rcu_read_unlock(); kfree_skb(probe); continue; } @@ -2490,6 +2492,7 @@ static void hw_scan_work(struct work_struct *work) local_bh_disable(); mac80211_hwsim_tx_frame(hwsim->hw, probe, hwsim->tmp_chan); + rcu_read_unlock(); local_bh_enable(); } } From f971e1887fdb3ab500c9bebf4b98f62d49a20655 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 May 2022 10:21:38 +0200 Subject: [PATCH 715/803] nl80211: fix locking in nl80211_set_tx_bitrate_mask() This accesses the wdev's chandef etc., so cannot safely be used without holding the lock. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220506102136.06b7205419e6.I2a87c05fbd8bc5e565e84d190d4cfd2e92695a90@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aa6094c3c9b0..1a3551b6d18b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11666,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct cfg80211_bitrate_mask mask; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; + wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true); if (err) - return err; + goto out; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); +out: + wdev_unlock(wdev); + return err; } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) From a36e07dfe6ee71e209383ea9288cd8d1617e14f9 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Fri, 6 May 2022 17:24:54 +0000 Subject: [PATCH 716/803] rfkill: uapi: fix RFKILL_IOCTL_MAX_SIZE ioctl request definition The definition of RFKILL_IOCTL_MAX_SIZE introduced by commit 54f586a91532 ("rfkill: make new event layout opt-in") is unusable since it is based on RFKILL_IOC_EXT_SIZE which has not been defined. Fix that by replacing the undefined constant with the constant which is intended to be used in this definition. Fixes: 54f586a91532 ("rfkill: make new event layout opt-in") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Gleb Fotengauer-Malinovskiy Signed-off-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20220506172454.120319-1-glebfm@altlinux.org [add commit message provided later by Dmitry] Signed-off-by: Johannes Berg --- include/uapi/linux/rfkill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 283c5a7b3f2c..db6c8588c1d0 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -184,7 +184,7 @@ struct rfkill_event_ext { #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) #define RFKILL_IOC_MAX_SIZE 2 -#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32) /* and that's all userspace gets */ From fe503887eed6ea528e144ec8dacfa1d47aa701ac Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 29 Apr 2022 17:49:17 +0100 Subject: [PATCH 717/803] slimbus: qcom: Fix IRQ check in qcom_slim_probe platform_get_irq() returns non-zero IRQ number on success, negative error number on failure. And the doc of platform_get_irq() provides a usage example: int irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; Fix the check of return value to catch errors correctly. Fixes: ad7fcbc308b0 ("slimbus: qcom: Add Qualcomm Slimbus controller driver") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220429164917.5202-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c index f04b961b96cd..ec58091fc948 100644 --- a/drivers/slimbus/qcom-ctrl.c +++ b/drivers/slimbus/qcom-ctrl.c @@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev) } ctrl->irq = platform_get_irq(pdev, 0); - if (!ctrl->irq) { + if (ctrl->irq < 0) { dev_err(&pdev->dev, "no slimbus IRQ\n"); - return -ENODEV; + return ctrl->irq; } sctrl = &ctrl->ctrl; From 085d16d5f949b64713d5e960d6c9bbf51bc1d511 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Sun, 8 May 2022 15:54:50 +0300 Subject: [PATCH 718/803] nfs: fix broken handling of the softreval mount option Turns out that ever since this mount option was added, passing `softreval` in NFS mount options cancelled all other flags while not affecting the underlying flag `NFS_MOUNT_SOFTREVAL`. Fixes: c74dfe97c104 ("NFS: Add mount option 'softreval'") Signed-off-by: Dan Aloni Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index e2d59bb5e6bb..9a16897e8dc6 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -517,7 +517,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (result.negated) ctx->flags &= ~NFS_MOUNT_SOFTREVAL; else - ctx->flags &= NFS_MOUNT_SOFTREVAL; + ctx->flags |= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) From e4b1045bf9cfec6f70ac6d3783be06c3a88dcb25 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 11:40:40 +0800 Subject: [PATCH 719/803] ionic: fix missing pci_release_regions() on error in ionic_probe() If ionic_map_bars() fails, pci_release_regions() need be called. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506034040.2614129-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 6ffc62c41165..0a7a757494bc 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -256,7 +256,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = ionic_map_bars(ionic); if (err) - goto err_out_pci_disable_device; + goto err_out_pci_release_regions; /* Configure the device */ err = ionic_setup(ionic); @@ -360,6 +360,7 @@ err_out_teardown: err_out_unmap_bars: ionic_unmap_bars(ionic); +err_out_pci_release_regions: pci_release_regions(pdev); err_out_pci_disable_device: pci_disable_device(pdev); From 51ca86b4c9c7c75f5630fa0dbe5f8f0bd98e3c3e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 17:42:50 +0800 Subject: [PATCH 720/803] ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one() Fix the missing pci_disable_device() before return from tulip_init_one() in the error handling case. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506094250.3630615-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dec/tulip/tulip_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 79df5a72877b..0040dcaab945 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1399,8 +1399,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1785,6 +1787,7 @@ err_out_free_res: err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } From 4bd46bb037f8e1883dbe1fc9e79896b7f885db3f Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 6 May 2022 15:37:39 -0700 Subject: [PATCH 721/803] ptp: ocp: Use DIV64_U64_ROUND_UP for rounding. The initial code used roundup() to round the starting time to a multiple of a period. This generated an error on 32-bit systems, so was replaced with DIV_ROUND_UP_ULL(). However, this truncates to 32-bits on a 64-bit system. Replace with DIV64_U64_ROUND_UP() instead. Fixes: b325af3cfab9 ("ptp: ocp: Add signal generators and update sysfs nodes") Signed-off-by: Jonathan Lemon Link: https://lore.kernel.org/r/20220506223739.1930-2-jonathan.lemon@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0feaa4b45317..dd45471f6780 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1557,7 +1557,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) start_ns = ktime_set(ts.tv_sec, ts.tv_nsec) + NSEC_PER_MSEC; if (!s->start) { /* roundup() does not work on 32-bit systems */ - s->start = DIV_ROUND_UP_ULL(start_ns, s->period); + s->start = DIV64_U64_ROUND_UP(start_ns, s->period); s->start = ktime_add(s->start, s->phase); } From ee1444b5e1df4155b591d0d9b1e72853a99ea861 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 6 May 2022 18:10:38 -0700 Subject: [PATCH 722/803] dim: initialize all struct fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The W=2 build pointed out that the code wasn't initializing all the variables in the dim_cq_moder declarations with the struct initializers. The net change here is zero since these structs were already static const globals and were initialized with zeros by the compiler, but removing compiler warnings has value in and of itself. lib/dim/net_dim.c: At top level: lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] 54 | NET_DIM_RX_EQE_PROFILES, | ^~~~~~~~~~~~~~~~~~~~~~~ In file included from lib/dim/net_dim.c:6: ./include/linux/dim.h:45:13: note: ‘comps’ declared here 45 | u16 comps; | ^~~~~ and repeats for the tx struct, and once you fix the comps entry then the cq_period_mode field needs the same treatment. Use the commonly accepted style to indicate to the compiler that we know what we're doing, and add a comma at the end of each struct initializer to clean up the issue, and use explicit initializers for the fields we are initializing which makes the compiler happy. While here and fixing these lines, clean up the code slightly with a fix for the super long lines by removing the word "_MODERATION" from a couple defines only used in this file. Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") Signed-off-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski --- lib/dim/net_dim.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 06811d866775..53f6b9c6e936 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -12,41 +12,41 @@ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES */ #define NET_DIM_PARAMS_NUM_PROFILES 5 -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 #define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ + {.usec = 2, .pkts = 256,}, \ + {.usec = 8, .pkts = 128,}, \ + {.usec = 16, .pkts = 64,}, \ + {.usec = 32, .pkts = 64,}, \ + {.usec = 64, .pkts = 64,} \ } #define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ + {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ + {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \ } #define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ + {.usec = 5, .pkts = 128,}, \ + {.usec = 8, .pkts = 64,}, \ + {.usec = 16, .pkts = 32,}, \ + {.usec = 32, .pkts = 32,}, \ + {.usec = 64, .pkts = 32,} \ } static const struct dim_cq_moder From 151d6dcbed836270c6c240932da66f147950cbdb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 May 2022 16:47:40 -0700 Subject: [PATCH 723/803] hwmon: (ltq-cputemp) restrict it to SOC_XWAY Building with SENSORS_LTQ_CPUTEMP=y with SOC_FALCON=y causes build errors since FALCON does not support the same features as XWAY. Change this symbol to depend on SOC_XWAY since that provides the necessary interfaces. Repairs these build errors: ../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_enable': ../drivers/hwmon/ltq-cputemp.c:23:9: error: implicit declaration of function 'ltq_cgu_w32'; did you mean 'ltq_ebu_w32'? [-Werror=implicit-function-declaration] 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); ../drivers/hwmon/ltq-cputemp.c:23:21: error: implicit declaration of function 'ltq_cgu_r32'; did you mean 'ltq_ebu_r32'? [-Werror=implicit-function-declaration] 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); ../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_probe': ../drivers/hwmon/ltq-cputemp.c:92:31: error: 'SOC_TYPE_VR9_2' undeclared (first use in this function) 92 | if (ltq_soc_type() != SOC_TYPE_VR9_2) Fixes: 7074d0a92758 ("hwmon: (ltq-cputemp) add cpu temp sensor driver") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Florian Eckert Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Link: https://lore.kernel.org/r/20220509234740.26841-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 68a8a27ab3b7..f2b038fa3b84 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -960,7 +960,7 @@ config SENSORS_LTC4261 config SENSORS_LTQ_CPUTEMP bool "Lantiq cpu temperature sensor driver" - depends on LANTIQ + depends on SOC_XWAY help If you say yes here you get support for the temperature sensor inside your CPU. From 7d1e6496616275f3830e2f2f91fa69a66953e95b Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 724/803] mm: mremap: fix sign for EFAULT error return value The mremap syscall is supposed to return a pointer to the new virtual memory area on success, and a negative value of the error code in case of failure. Currently, EFAULT is returned when the VMA is not found, instead of -EFAULT. The users of this syscall will therefore believe the syscall succeeded in case the VMA didn't exist, as it returns a pointer to address 0xe (0xe being the value of EFAULT). Fix the sign of the error value. Link: https://lkml.kernel.org/r/20220427224439.23828-2-dossche.niels@gmail.com Fixes: 550a7d60bd5e ("mm, hugepages: add mremap() support for hugepage backed vma") Signed-off-by: Niels Dossche Cc: Mina Almasry Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 303d3290b938..0b93fac76851 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -947,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, return -EINTR; vma = vma_lookup(mm, addr); if (!vma) { - ret = EFAULT; + ret = -EFAULT; goto out; } From 1927e498aee1757b3df755a194cbfc5cc0f2b663 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 725/803] procfs: prevent unprivileged processes accessing fdinfo dir The file permissions on the fdinfo dir from were changed from S_IRUSR|S_IXUSR to S_IRUGO|S_IXUGO, and a PTRACE_MODE_READ check was added for opening the fdinfo files [1]. However, the ptrace permission check was not added to the directory, allowing anyone to get the open FD numbers by reading the fdinfo directory. Add the missing ptrace permission check for opening the fdinfo directory. [1] https://lkml.kernel.org/r/20210308170651.919148-1-kaleshsingh@google.com Link: https://lkml.kernel.org/r/20210713162008.1056986-1-kaleshsingh@google.com Fixes: 7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ") Signed-off-by: Kalesh Singh Cc: Kees Cook Cc: Eric W. Biederman Cc: Christian Brauner Cc: Suren Baghdasaryan Cc: Hridya Valsaraju Cc: Jann Horn Signed-off-by: Andrew Morton --- fs/proc/fd.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 172c86270b31..913bef0d2a36 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -72,7 +72,7 @@ out: return 0; } -static int seq_fdinfo_open(struct inode *inode, struct file *file) +static int proc_fdinfo_access_allowed(struct inode *inode) { bool allowed = false; struct task_struct *task = get_proc_task(inode); @@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) if (!allowed) return -EACCES; + return 0; +} + +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + return single_open(file, seq_show, inode); } @@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) proc_fdinfo_instantiate); } +static int proc_open_fdinfo(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + + return 0; +} + const struct inode_operations proc_fdinfo_inode_operations = { .lookup = proc_lookupfdinfo, .setattr = proc_setattr, }; const struct file_operations proc_fdinfo_operations = { + .open = proc_open_fdinfo, .read = generic_read_dir, .iterate_shared = proc_readfdinfo, .llseek = generic_file_llseek, From 260364d112bc822005224667c0c9b1b17a53eafd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 9 May 2022 17:34:28 -0700 Subject: [PATCH 726/803] arm[64]/memremap: don't abuse pfn_valid() to ensure presence of linear map The semantics of pfn_valid() is to check presence of the memory map for a PFN and not whether a PFN is covered by the linear map. The memory map may be present for NOMAP memory regions, but they won't be mapped in the linear mapping. Accessing such regions via __va() when they are memremap()'ed will cause a crash. On v5.4.y the crash happens on qemu-arm with UEFI [1]: <1>[ 0.084476] 8<--- cut here --- <1>[ 0.084595] Unable to handle kernel paging request at virtual address dfb76000 <1>[ 0.084938] pgd = (ptrval) <1>[ 0.085038] [dfb76000] *pgd=5f7fe801, *pte=00000000, *ppte=00000000 ... <4>[ 0.093923] [] (memcpy) from [] (dmi_setup+0x60/0x418) <4>[ 0.094204] [] (dmi_setup) from [] (arm_dmi_init+0x8/0x10) <4>[ 0.094408] [] (arm_dmi_init) from [] (do_one_initcall+0x50/0x228) <4>[ 0.094619] [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1f8) <4>[ 0.094841] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) <4>[ 0.095057] [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) On kernels v5.10.y and newer the same crash won't reproduce on ARM because commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") changed the way memory regions are registered in the resource tree, but that merely covers up the problem. On ARM64 memory resources registered in yet another way and there the issue of wrong usage of pfn_valid() to ensure availability of the linear map is also covered. Implement arch_memremap_can_ram_remap() on ARM and ARM64 to prevent access to NOMAP regions via the linear mapping in memremap(). Link: https://lore.kernel.org/all/Yl65zxGgFzF1Okac@sirena.org.uk Link: https://lkml.kernel.org/r/20220426060107.7618-1-rppt@kernel.org Signed-off-by: Mike Rapoport Reported-by: "kernelci.org bot" Tested-by: Mark Brown Reviewed-by: Ard Biesheuvel Acked-by: Catalin Marinas Cc: Greg Kroah-Hartman Cc: Mark Brown Cc: Mark-PK Tsai Cc: Russell King Cc: Tony Lindgren Cc: Will Deacon Cc: [5.4+] Signed-off-by: Andrew Morton --- arch/arm/include/asm/io.h | 3 +++ arch/arm/mm/ioremap.c | 8 ++++++++ arch/arm64/include/asm/io.h | 4 ++++ arch/arm64/mm/ioremap.c | 8 ++++++++ 4 files changed, 23 insertions(+) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 0c70eb688a00..2a0739a2350b 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -440,6 +440,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap #endif /* diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index aa08bcb72db9..290702328a33 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -493,3 +493,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 7fd836bea7eb..3995652daf81 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b7c81dacabf0..b21f91cd830d 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -99,3 +99,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return pfn_is_map_memory(pfn); +} From 5fcaa7caf5fa7f3ad9eaba1897f226d520f963cc Mon Sep 17 00:00:00 2001 From: Martyna Szapar-Mudlaw Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 727/803] mailmap: add entry for martyna.szapar-mudlaw@intel.com Separate linux.intel.com account was created for submitting and reviewing kernel patches, thus need to map previously used primary Intel e-mail address. Link: https://lkml.kernel.org/r/20220505132624.41802-1-martyna.szapar-mudlaw@linux.intel.com Signed-off-by: Martyna Szapar-Mudlaw Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 93458154ce7d..36a275ab8c52 100644 --- a/.mailmap +++ b/.mailmap @@ -249,6 +249,7 @@ Mark Yao Martin Kepplinger Martin Kepplinger Martin Kepplinger +Martyna Szapar-Mudlaw Mathieu Othacehe Matthew Wilcox Matthew Wilcox From 2839b0999c20c9f6bf353849c69370e121e2fa1a Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 728/803] mm/kfence: reset PG_slab and memcg_data before freeing __kfence_pool When kfence fails to initialize kfence pool, it frees the pool. But it does not reset memcg_data and PG_slab flag. Below is a BUG because of this. Let's fix it by resetting memcg_data and PG_slab flag before free. [ 0.089149] BUG: Bad page state in process swapper/0 pfn:3d8e06 [ 0.089149] page:ffffea46cf638180 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x3d8e06 [ 0.089150] memcg:ffffffff94a475d1 [ 0.089150] flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) [ 0.089151] raw: 0017ffffc0000200 ffffea46cf638188 ffffea46cf638188 0000000000000000 [ 0.089152] raw: 0000000000000000 0000000000000000 00000000ffffffff ffffffff94a475d1 [ 0.089152] page dumped because: page still charged to cgroup [ 0.089153] Modules linked in: [ 0.089153] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G B W 5.18.0-rc1+ #965 [ 0.089154] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 0.089154] Call Trace: [ 0.089155] [ 0.089155] dump_stack_lvl+0x49/0x5f [ 0.089157] dump_stack+0x10/0x12 [ 0.089158] bad_page.cold+0x63/0x94 [ 0.089159] check_free_page_bad+0x66/0x70 [ 0.089160] __free_pages_ok+0x423/0x530 [ 0.089161] __free_pages_core+0x8e/0xa0 [ 0.089162] memblock_free_pages+0x10/0x12 [ 0.089164] memblock_free_late+0x8f/0xb9 [ 0.089165] kfence_init+0x68/0x92 [ 0.089166] start_kernel+0x789/0x992 [ 0.089167] x86_64_start_reservations+0x24/0x26 [ 0.089168] x86_64_start_kernel+0xa9/0xaf [ 0.089170] secondary_startup_64_no_verify+0xd5/0xdb [ 0.089171] Link: https://lkml.kernel.org/r/YnPG3pQrqfcgOlVa@hyeyoo Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Fixes: 8f0b36497303 ("mm: kfence: fix objcgs vector allocation") Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Marco Elver Reviewed-by: Muchun Song Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kfence/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 9b2b5f56f4ae..11a954763be9 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -621,6 +621,16 @@ static bool __init kfence_init_pool_early(void) * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ + for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) { + struct slab *slab = virt_to_slab(p); + + if (!slab) + continue; +#ifdef CONFIG_MEMCG + slab->memcg_data = 0; +#endif + __folio_clear_slab(slab_folio(slab)); + } memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; return false; From 41c240099fe09377b6b9f8272e45d2267c843d3e Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 729/803] selftests: vm: Makefile: rename TARGETS to VMTARGETS The tools/testing/selftests/vm/Makefile uses the variable TARGETS internally to generate a list of platform-specific binary build targets suffixed with _{32,64}. When building the selftests using its own Makefile directly, such as via the following command run in a kernel tree: One receives an error such as the following: make: Entering directory '/root/linux/tools/testing/selftests' make --no-builtin-rules ARCH=x86 -C ../../.. headers_install make[1]: Entering directory '/root/linux' INSTALL ./usr/include make[1]: Leaving directory '/root/linux' make[1]: Entering directory '/root/linux/tools/testing/selftests/vm' make[1]: *** No rule to make target 'vm.c', needed by '/root/linux/tools/testing/selftests/vm/vm_64'. Stop. make[1]: Leaving directory '/root/linux/tools/testing/selftests/vm' make: *** [Makefile:175: all] Error 2 make: Leaving directory '/root/linux/tools/testing/selftests' The TARGETS variable passed to tools/testing/selftests/Makefile collides with the TARGETS used in tools/testing/selftests/vm/Makefile, so rename the latter to VMTARGETS, eliminating the collision with no functional change. Link: https://lkml.kernel.org/r/20220504213454.1282532-1-jsavitz@redhat.com Fixes: f21fda8f6453 ("selftests: vm: pkeys: fix multilib builds for x86") Signed-off-by: Joel Savitz Acked-by: Nico Pache Cc: Joel Savitz Cc: Shuah Khan Cc: Sandipan Das Cc: Dave Hansen Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 04a49e876a46..5b1ecd00695b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) -TARGETS := protection_keys -BINARIES_32 := $(TARGETS:%=%_32) -BINARIES_64 := $(TARGETS:%=%_64) +VMTARGETS := protection_keys +BINARIES_32 := $(VMTARGETS:%=%_32) +BINARIES_64 := $(VMTARGETS:%=%_64) ifeq ($(CAN_BUILD_WITH_NOPIE),1) CFLAGS += -no-pie @@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave $(BINARIES_32): LDLIBS += -lrt -ldl -lm $(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) +$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) @@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave $(BINARIES_64): LDLIBS += -lrt -ldl $(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) +$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif # x86_64 users should be encouraged to install 32-bit libraries From 9039b8335276569670caaf23606335946625e764 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 9 May 2022 17:34:29 -0700 Subject: [PATCH 730/803] MAINTAINERS: add a mailing list for DAMON development This commit adds an open mailing list for DAMON in MAINTAINERS file. Link: https://lkml.kernel.org/r/20220503180741.137079-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2647adf30569..df6c341aca44 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5438,6 +5438,7 @@ F: net/ax25/sysctl_net_ax25.c DATA ACCESS MONITOR M: SeongJae Park +L: damon@lists.linux.dev L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-damon From 91a7cda1f4b8bdf770000a3b60640576dafe0cec Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 6 May 2022 08:08:15 +0200 Subject: [PATCH 731/803] net: phy: Fix race condition on link status change This fixes the following error caused by a race condition between phydev->adjust_link() and a MDIO transaction in the phy interrupt handler. The issue was reproduced with the ethernet FEC driver and a micrel KSZ9031 phy. [ 146.195696] fec 2188000.ethernet eth0: MDIO read timeout [ 146.201779] ------------[ cut here ]------------ [ 146.206671] WARNING: CPU: 0 PID: 571 at drivers/net/phy/phy.c:942 phy_error+0x24/0x6c [ 146.214744] Modules linked in: bnep imx_vdoa imx_sdma evbug [ 146.220640] CPU: 0 PID: 571 Comm: irq/128-2188000 Not tainted 5.18.0-rc3-00080-gd569e86915b7 #9 [ 146.229563] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 146.236257] unwind_backtrace from show_stack+0x10/0x14 [ 146.241640] show_stack from dump_stack_lvl+0x58/0x70 [ 146.246841] dump_stack_lvl from __warn+0xb4/0x24c [ 146.251772] __warn from warn_slowpath_fmt+0x5c/0xd4 [ 146.256873] warn_slowpath_fmt from phy_error+0x24/0x6c [ 146.262249] phy_error from kszphy_handle_interrupt+0x40/0x48 [ 146.268159] kszphy_handle_interrupt from irq_thread_fn+0x1c/0x78 [ 146.274417] irq_thread_fn from irq_thread+0xf0/0x1dc [ 146.279605] irq_thread from kthread+0xe4/0x104 [ 146.284267] kthread from ret_from_fork+0x14/0x28 [ 146.289164] Exception stack(0xe6fa1fb0 to 0xe6fa1ff8) [ 146.294448] 1fa0: 00000000 00000000 00000000 00000000 [ 146.302842] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 146.311281] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 146.318262] irq event stamp: 12325 [ 146.321780] hardirqs last enabled at (12333): [] __up_console_sem+0x50/0x60 [ 146.330013] hardirqs last disabled at (12342): [] __up_console_sem+0x3c/0x60 [ 146.338259] softirqs last enabled at (12324): [] __do_softirq+0x2c0/0x624 [ 146.346311] softirqs last disabled at (12319): [] __irq_exit_rcu+0x138/0x178 [ 146.354447] ---[ end trace 0000000000000000 ]--- With the FEC driver phydev->adjust_link() calls fec_enet_adjust_link() calls fec_stop()/fec_restart() and both these function reset and temporary disable the FEC disrupting any MII transaction that could be happening at the same time. fec_enet_adjust_link() and phy_read() can be running at the same time when we have one additional interrupt before the phy_state_machine() is able to terminate. Thread 1 (phylib WQ) | Thread 2 (phy interrupt) | | phy_interrupt() <-- PHY IRQ | handle_interrupt() | phy_read() | phy_trigger_machine() | --> schedule phylib WQ | | phy_state_machine() | phy_check_link_status() | phy_link_change() | phydev->adjust_link() | fec_enet_adjust_link() | --> FEC reset | phy_interrupt() <-- PHY IRQ | phy_read() | Fix this by acquiring the phydev lock in phy_interrupt(). Link: https://lore.kernel.org/all/20220422152612.GA510015@francesco-nb.int.toradex.com/ Fixes: c974bdbc3e77 ("net: phy: Use threaded IRQ, to allow IRQ from sleeping devices") cc: Signed-off-by: Francesco Dolcini Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220506060815.327382-1-francesco.dolcini@toradex.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index beb2b66da132..f122026c4682 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -970,8 +970,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; struct phy_driver *drv = phydev->drv; + irqreturn_t ret; - return drv->handle_interrupt(phydev); + mutex_lock(&phydev->lock); + ret = drv->handle_interrupt(phydev); + mutex_unlock(&phydev->lock); + + return ret; } /** From 630fd4822af2374cd75c682b7665dcb367613765 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 7 May 2022 16:45:50 +0300 Subject: [PATCH 732/803] net: dsa: flush switchdev workqueue on bridge join error path There is a race between switchdev_bridge_port_offload() and the dsa_port_switchdev_sync_attrs() call right below it. When switchdev_bridge_port_offload() finishes, FDB entries have been replayed by the bridge, but are scheduled for deferred execution later. However dsa_port_switchdev_sync_attrs -> dsa_port_can_apply_vlan_filtering() may impose restrictions on the vlan_filtering attribute and refuse offloading. When this happens, the delayed FDB entries will dereference dp->bridge, which is a NULL pointer because we have stopped the process of offloading this bridge. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Workqueue: dsa_ordered dsa_slave_switchdev_event_work pc : dsa_port_bridge_host_fdb_del+0x64/0x100 lr : dsa_slave_switchdev_event_work+0x130/0x1bc Call trace: dsa_port_bridge_host_fdb_del+0x64/0x100 dsa_slave_switchdev_event_work+0x130/0x1bc process_one_work+0x294/0x670 worker_thread+0x80/0x460 ---[ end trace 0000000000000000 ]--- Error: dsa_core: Must first remove VLAN uppers having VIDs also present in bridge. Fix the bug by doing what we do on the normal bridge leave path as well, which is to wait until the deferred FDB entries complete executing, then exit. The placement of dsa_flush_workqueue() after switchdev_bridge_port_unoffload() guarantees that both the FDB additions and deletions on rollback are waited for. Fixes: d7d0d423dbaa ("net: dsa: flush switchdev workqueue when leaving the bridge") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220507134550.1849834-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/port.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/port.c b/net/dsa/port.c index cdc56ba11f52..bdccb613285d 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -451,6 +451,7 @@ out_rollback_unoffload: switchdev_bridge_port_unoffload(brport_dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier); + dsa_flush_workqueue(); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: From 620239d9a32e9fe27c9204ec11e40058671aeeb6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 25 Apr 2022 15:54:27 -0400 Subject: [PATCH 733/803] ceph: fix setting of xattrs on async created inodes Currently when we create a file, we spin up an xattr buffer to send along with the create request. If we end up doing an async create however, then we currently pass down a zero-length xattr buffer. Fix the code to send down the xattr buffer in req->r_pagelist. If the xattrs span more than a page, however give up and don't try to do an async create. Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2063929 Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Reported-by: John Fortin Reported-by: Sri Ramanujam Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6c9e837aa1d3..8c8226c0feac 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, iinfo.change_attr = 1; ceph_encode_timespec64(&iinfo.btime, &now); - iinfo.xattr_len = ARRAY_SIZE(xattr_buf); - iinfo.xattr_data = xattr_buf; - memset(iinfo.xattr_data, 0, iinfo.xattr_len); + if (req->r_pagelist) { + iinfo.xattr_len = req->r_pagelist->length; + iinfo.xattr_data = req->r_pagelist->mapped_tail; + } else { + /* fake it */ + iinfo.xattr_len = ARRAY_SIZE(xattr_buf); + iinfo.xattr_data = xattr_buf; + memset(iinfo.xattr_data, 0, iinfo.xattr_len); + } in.ino = cpu_to_le64(vino.ino); in.snapid = cpu_to_le64(CEPH_NOSNAP); @@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out_ctx; + /* Async create can't handle more than a page of xattrs */ + if (as_ctx.pagelist && + !list_is_singular(&as_ctx.pagelist->head)) + try_async = false; } else if (!d_in_lookup(dentry)) { /* If it's not being looked up, it's negative */ return -ENOENT; From 642d51fb0775a41dd6bb3d99b5a40c24df131c20 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 May 2022 18:53:09 +0800 Subject: [PATCH 734/803] ceph: check folio PG_private bit instead of folio->private The pages in the file mapping maybe reclaimed and reused by other subsystems and the page->private maybe used as flags field or something else, if later that pages are used by page caches again the page->private maybe not cleared as expected. Here will check the PG_private bit instead of the folio->private. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/55421 Signed-off-by: Xiubo Li Reviewed-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa25bffd4823..b6edcf89a429 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) if (folio_test_dirty(folio)) { dout("%p dirty_folio %p idx %lu -- already dirty\n", mapping->host, folio, folio->index); - BUG_ON(!folio_get_private(folio)); + VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } @@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) * Reference snap context in folio->private. Also set * PagePrivate so that we get invalidate_folio callback. */ - BUG_ON(folio_get_private(folio)); + VM_BUG_ON_FOLIO(folio_test_private(folio), folio); folio_attach_private(folio, snapc); return ceph_fscache_dirty_folio(mapping, folio); @@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, } WARN_ON(!folio_test_locked(folio)); - if (folio_get_private(folio)) { + if (folio_test_private(folio)) { dout("%p invalidate_folio idx %lu full dirty page\n", inode, folio->index); @@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req) /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { - if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) + if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { + pr_warn("%s incorrect op %d req %p index %d tid %llu\n", + __func__, req->r_ops[i].op, req, i, req->r_tid); break; + } osd_data = osd_req_op_extent_osd_data(req, i); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); From 1809c30b6e5a83a1de1435fe01aaa4de4d626a7c Mon Sep 17 00:00:00 2001 From: Manuel Ullmann Date: Wed, 4 May 2022 21:30:44 +0200 Subject: [PATCH 735/803] net: atlantic: always deep reset on pm op, fixing up my null deref regression The impact of this regression is the same for resume that I saw on thaw: the kernel hangs and nothing except SysRq rebooting can be done. Fixes regression in commit cbe6c3a8f8f4 ("net: atlantic: invert deep par in pm functions, preventing null derefs"), where I disabled deep pm resets in suspend and resume, trying to make sense of the atl_resume_common() deep parameter in the first place. It turns out, that atlantic always has to deep reset on pm operations. Even though I expected that and tested resume, I screwed up by kexec-rebooting into an unpatched kernel, thus missing the breakage. This fixup obsoletes the deep parameter of atl_resume_common, but I leave the cleanup for the maintainers to post to mainline. Suspend and hibernation were successfully tested by the reporters. Fixes: cbe6c3a8f8f4 ("net: atlantic: invert deep par in pm functions, preventing null derefs") Link: https://lore.kernel.org/regressions/9-Ehc_xXSwdXcvZqKD5aSqsqeNj5Izco4MYEwnx5cySXVEc9-x_WC4C3kAoCqNTi-H38frroUK17iobNVnkLtW36V6VWGSQEOHXhmVMm5iQ=@protonmail.com/ Reported-by: Jordan Leppert Reported-by: Holger Hoffstaette Tested-by: Jordan Leppert Tested-by: Holger Hoffstaette CC: # 5.10+ Signed-off-by: Manuel Ullmann Link: https://lore.kernel.org/r/87bkw8dfmp.fsf@posteo.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 3a529ee8c834..831833911a52 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -449,7 +449,7 @@ static int aq_pm_freeze(struct device *dev) static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, false); + return aq_suspend_common(dev, true); } static int aq_pm_thaw(struct device *dev) @@ -459,7 +459,7 @@ static int aq_pm_thaw(struct device *dev) static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, false); + return atl_resume_common(dev, true); } static const struct dev_pm_ops aq_pm_ops = { From ef3a6b70507a2add2cd2e01f5eb9b54d561bacb9 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 10 May 2022 01:19:57 +0530 Subject: [PATCH 736/803] dma-buf: call dma_buf_stats_setup after dmabuf is in valid list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dma_buf_stats_setup() fails, it closes the dmabuf file which results into the calling of dma_buf_file_release() where it does list_del(&dmabuf->list_node) with out first adding it to the proper list. This is resulting into panic in the below path: __list_del_entry_valid+0x38/0xac dma_buf_file_release+0x74/0x158 __fput+0xf4/0x428 ____fput+0x14/0x24 task_work_run+0x178/0x24c do_notify_resume+0x194/0x264 work_pending+0xc/0x5f0 Fix it by moving the dma_buf_stats_setup() after dmabuf is added to the list. Fixes: bdb8d06dfefd ("dmabuf: Add the capability to expose DMA-BUF stats in sysfs") Signed-off-by: Charan Teja Reddy Tested-by: T.J. Mercier Acked-by: T.J. Mercier Cc: # 5.15.x+ Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1652125797-2043-1-git-send-email-quic_charante@quicinc.com --- drivers/dma-buf/dma-buf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index df23239b04fc..b1e25ae98302 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -543,10 +543,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_mode |= FMODE_LSEEK; dmabuf->file = file; - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); @@ -554,6 +550,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); + ret = dma_buf_stats_setup(dmabuf); + if (ret) + goto err_sysfs; + return dmabuf; err_sysfs: From 846a3351ddfe4a86eede4bb26a205c3f38ef84d3 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Tue, 10 May 2022 10:35:14 +0800 Subject: [PATCH 737/803] writeback: Avoid skipping inode writeback We have run into an issue that a task gets stuck in balance_dirty_pages_ratelimited() when perform I/O stress testing. The reason we observed is that an I_DIRTY_PAGES inode with lots of dirty pages is in b_dirty_time list and standard background writeback cannot writeback the inode. After studing the relevant code, the following scenario may lead to the issue: task1 task2 ----- ----- fuse_flush write_inode_now //in b_dirty_time writeback_single_inode __writeback_single_inode fuse_write_end filemap_dirty_folio __xa_set_mark:PAGECACHE_TAG_DIRTY lock inode->i_lock if mapping tagged PAGECACHE_TAG_DIRTY inode->i_state |= I_DIRTY_PAGES unlock inode->i_lock __mark_inode_dirty:I_DIRTY_PAGES lock inode->i_lock -was dirty,inode stays in -b_dirty_time unlock inode->i_lock if(!(inode->i_state & I_DIRTY_All)) -not true,so nothing done This patch moves the dirty inode to b_dirty list when the inode currently is not queued in b_io or b_more_io list at the end of writeback_single_inode. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") Signed-off-by: Jing Xia Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220510023514.27399-1-jing.xia@unisoc.com --- fs/fs-writeback.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 591fe9cf1659..1fae0196292a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,6 +1712,10 @@ static int writeback_single_inode(struct inode *inode, */ if (!(inode->i_state & I_DIRTY_ALL)) inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED) && + (inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: From dc5306a8c0eace6c113aded2e36ae5e15fdca4d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 8 May 2022 03:22:17 -0700 Subject: [PATCH 738/803] decnet: Use container_of() for struct dn_neigh casts Clang's structure layout randomization feature gets upset when it sees struct neighbor (which is randomized) cast to struct dn_neigh: net/decnet/dn_route.c:1123:15: error: casting from randomized structure pointer type 'struct neighbour *' to 'struct dn_neigh *' gateway = ((struct dn_neigh *)neigh)->addr; ^ Update all the open-coded casts to use container_of() to do the conversion instead of depending on strict member ordering. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202205041247.WKBEHGS5-lkp@intel.com Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Yajun Deng Cc: Zheng Yongjun Cc: Bill Wendling Cc: linux-decnet-user@lists.sourceforge.net Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220508102217.2647184-1-keescook@chromium.org Signed-off-by: Paolo Abeni --- net/decnet/dn_dev.c | 4 ++-- net/decnet/dn_neigh.c | 3 ++- net/decnet/dn_route.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0ee7d4c0c955..a09ba642b5e7 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->neighbor, dn_hiord, ETH_ALEN); if (dn_db->router) { - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); dn_dn2eth(msg->neighbor, dn->addr); } @@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; + struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n); struct sk_buff *skb; size_t size; unsigned char *ptr; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 94b306f6d551..fbd98ac853ea 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { - if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) + if (msg->priority > container_of(dn_db->router, + struct dn_neigh, n)->priority) neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); } } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 7e85f2a1ae25..d1d78a463a06 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1120,7 +1120,7 @@ source_ok: /* Ok then, we assume its directly connected and move on */ select_source: if (neigh) - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; if (gateway == 0) gateway = fld.daddr; if (fld.saddr == 0) { @@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb) /* Use the default router if there is one */ neigh = neigh_clone(dn_db->router); if (neigh) { - gateway = ((struct dn_neigh *)neigh)->addr; + gateway = container_of(neigh, struct dn_neigh, n)->addr; goto make_route; } From 628bf55b620497a105f4963ee8fb84769f7e6bb4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 10 May 2022 09:38:58 +0100 Subject: [PATCH 739/803] iommu/arm-smmu: Force identity domains for legacy binding When using the legacy "mmu-masters" DT binding, we reject DMA domains since we have no guarantee of driver probe order and thus can't rely on client drivers getting the correct DMA ops. However, we can do better than fall back to the old no-default-domain behaviour now, by forcing an identity default domain instead. This also means that detaching from a VFIO domain can actually work - that looks to have been broken for over 6 years, so clearly isn't something that legacy binding users care about, but we may as well make the driver code make sense anyway. Suggested-by: Jason Gunthorpe Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9805e4c492cb972bdcdd57999d2d001a2d8b5aab.1652171938.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 52b71f6aee3f..2ed3594f384e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1574,6 +1574,9 @@ static int arm_smmu_def_domain_type(struct device *dev) struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); const struct arm_smmu_impl *impl = cfg->smmu->impl; + if (using_legacy_binding) + return IOMMU_DOMAIN_IDENTITY; + if (impl && impl->def_domain_type) return impl->def_domain_type(dev); From 205f3991a273cac6008ef4db3d1c0dc54d14fb56 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 10 May 2022 11:27:21 +0100 Subject: [PATCH 740/803] arm64: vdso: fix makefile dependency on vdso.so There is currently no dependency for vdso*-wrap.S on vdso*.so, which means that you can get a build that uses a stale vdso*-wrap.o. In commit a5b8ca97fbf8, the file that includes the vdso.so was moved and renamed from arch/arm64/kernel/vdso/vdso.S to arch/arm64/kernel/vdso-wrap.S, when this happened the Makefile was not updated to force the dependcy on vdso.so. Fixes: a5b8ca97fbf8 ("arm64: do not descend to vdso directories twice") Signed-off-by: Joey Gouly Cc: Masahiro Yamada Cc: Vincenzo Frascino Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220510102721.50811-1-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 4 ++++ arch/arm64/kernel/vdso/Makefile | 3 --- arch/arm64/kernel/vdso32/Makefile | 3 --- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 986837d7ec82..fa7981d0d917 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -75,6 +75,10 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o +# Force dependency (vdso*-wrap.S includes vdso.so through incbin) +$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so +$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so + obj-y += probes/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 172452f79e46..ac1964ebed1e 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -52,9 +52,6 @@ GCOV_PROFILE := n targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -# Force dependency (incbin is bad) -$(obj)/vdso.o : $(obj)/vdso.so - # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold_and_vdso_check) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index ed181bedbffc..05ba1aae1b6f 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -131,9 +131,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -# Force dependency (vdso.s includes vdso.so through incbin) -$(obj)/vdso.o: $(obj)/vdso.so - include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) From 7ff960a6fe399fdcbca6159063684671ae57eee9 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Tue, 10 May 2022 19:27:23 +0900 Subject: [PATCH 741/803] virtio: fix virtio transitional ids This commit fixes the transitional PCI device ID. Fixes: d61914ea6ada ("virtio: update virtio id table, add transitional ids") Signed-off-by: Shunsuke Mie Link: https://lore.kernel.org/r/20220510102723.87666-1-mie@igel.co.jp Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 80d76b75bccd..7aa2eb766205 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -73,12 +73,12 @@ * Virtio Transitional IDs */ -#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ -#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ -#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ -#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ -#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ -#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ -#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ +#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ #endif /* _LINUX_VIRTIO_IDS_H */ From c1ad35dd0548ce947d97aaf92f7f2f9a202951cf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 May 2022 12:36:04 +0200 Subject: [PATCH 742/803] udf: Avoid using stale lengthOfImpUse udf_write_fi() uses lengthOfImpUse of the entry it is writing to. However this field has not yet been initialized so it either contains completely bogus value or value from last directory entry at that place. In either case this is wrong and can lead to filesystem corruption or kernel crashes. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 979a6e28dd96 ("udf: Get rid of 0-length arrays in struct fileIdentDesc") Signed-off-by: Jan Kara --- fs/udf/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0ed4861b038f..b3d5f97f16cd 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -75,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, if (fileident) { if (adinicb || (offset + lfi < 0)) { - memcpy(udf_get_fi_ident(sfi), fileident, lfi); + memcpy(sfi->impUse + liu, fileident, lfi); } else if (offset >= 0) { memcpy(fibh->ebh->b_data + offset, fileident, lfi); } else { - memcpy(udf_get_fi_ident(sfi), fileident, -offset); + memcpy(sfi->impUse + liu, fileident, -offset); memcpy(fibh->ebh->b_data, fileident - offset, lfi + offset); } @@ -88,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, offset += lfi; if (adinicb || (offset + padlen < 0)) { - memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen); + memset(sfi->impUse + liu + lfi, 0x00, padlen); } else if (offset >= 0) { memset(fibh->ebh->b_data + offset, 0x00, padlen); } else { - memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset); + memset(sfi->impUse + liu + lfi, 0x00, -offset); memset(fibh->ebh->b_data, 0x00, padlen + offset); } From c46721e4604f260918e660550a16d1e28637d66c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 9 May 2022 10:35:10 -0600 Subject: [PATCH 743/803] MAINTAINERS: Add James and Mike as Arm64 performance events reviewers James, Mike and Leo have been doing all the reviews and development work for the Coresight perf tools for a couple of years now. As such remove my name and add James and Mike as official reviewers (Leo is already listed as such). Signed-off-by: Mathieu Poirier Cc: James Clark Cc: Leo Yan Cc: Mike Leach Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e8c52d0192a6..2da9d14dde85 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15475,7 +15475,8 @@ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 R: John Garry R: Will Deacon -R: Mathieu Poirier +R: James Clark +R: Mike Leach R: Leo Yan L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported From 575f00edea0a7117e6a4337800ebf62e2a1d09d6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 24 Mar 2022 15:16:05 +0000 Subject: [PATCH 744/803] Documentation/process: Update ARM contact for embargoed hardware issues With Grant taking a prominent role in Linaro, I will take over as the process ambassador for ARM w.r.t. embargoed hardware issues. Signed-off-by: Catalin Marinas Cc: Grant Likely Cc: Jonathan Corbet Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 98d7bc868f2a..95999302d279 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -246,7 +246,7 @@ an involved disclosed party. The current ambassadors list: ============= ======================================================== AMD Tom Lendacky Ampere Darren Hart - ARM Grant Likely + ARM Catalin Marinas IBM Power Anton Blanchard IBM Z Christian Borntraeger Intel Tony Luck From 12a4d677b1c34717443470c1492fe520638ef39a Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 9 May 2022 22:45:19 +0800 Subject: [PATCH 745/803] net: phy: micrel: Fix incorrect variable type in micrel In lanphy_read_page_reg, calling __phy_read() might return a negative error code. Use 'int' to check the error code. Fixes: 7c2dcfa295b1 ("net: phy: micrel: Add support for LAN8804 PHY") Signed-off-by: Wan Jiabing Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220509144519.2343399-1-wanjiabing@vivo.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9d7dafed3931..cd9aa353b653 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1743,7 +1743,7 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) { - u32 data; + int data; phy_lock_mdio_bus(phydev); __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); @@ -2444,8 +2444,7 @@ static int lan8804_config_init(struct phy_device *phydev) static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) { - u16 tsu_irq_status; - int irq_status; + int irq_status, tsu_irq_status; irq_status = phy_read(phydev, LAN8814_INTS); if (irq_status > 0 && (irq_status & LAN8814_INT_LINK)) From 792ea6a074ae7ea5ab6f1b8b31f76bb0297de66c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 May 2022 09:56:05 +0200 Subject: [PATCH 746/803] genirq: Remove WARN_ON_ONCE() in generic_handle_domain_irq() Since commit 0953fb263714 ("irq: remove handle_domain_{irq,nmi}()"), generic_handle_domain_irq() warns if called outside hardirq context, even though the function calls down to handle_irq_desc(), which warns about the same, but conditionally on handle_enforce_irqctx(). The newly added warning is a false positive if the interrupt originates from any other irqchip than x86 APIC or ARM GIC/GICv3. Those are the only ones for which handle_enforce_irqctx() returns true. Per commit c16816acd086 ("genirq: Add protection against unsafe usage of generic_handle_irq()"): "In general calling generic_handle_irq() with interrupts disabled from non interrupt context is harmless. For some interrupt controllers like the x86 trainwrecks this is outright dangerous as it might corrupt state if an interrupt affinity change is pending." Examples for interrupt chips where the warning is a false positive are USB-attached GPIO controllers such as drivers/gpio/gpio-dln2.c: USB gadgets are incapable of directly signaling an interrupt because they cannot initiate a bus transaction by themselves. All communication on the bus is initiated by the host controller, which polls a gadget's Interrupt Endpoint in regular intervals. If an interrupt is pending, that information is passed up the stack in softirq context, from which a hardirq is synthesized via generic_handle_domain_irq(). Remove the warning to eliminate such false positives. Fixes: 0953fb263714 ("irq: remove handle_domain_{irq,nmi}()") Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Mark Rutland Cc: Jakub Kicinski CC: Linus Walleij Cc: Bartosz Golaszewski Cc: Octavian Purdila Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220505113207.487861b2@kernel.org Link: https://lore.kernel.org/r/20220506203242.GA1855@wunner.de Link: https://lore.kernel.org/r/c3caf60bfa78e5fdbdf483096b7174da65d1813a.1652168866.git.lukas@wunner.de --- kernel/irq/irqdesc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0099b87dd853..d323b180b0f3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -701,7 +701,6 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe); */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_hardirq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); From cc28fac16ab7152168be8bee76708df9d65efd71 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 6 May 2022 13:55:28 -0700 Subject: [PATCH 747/803] scsi: lpfc: Fix split code for FLOGI on FCoE The refactoring code converted context information from SLI-3 to SLI-4. The conversion for the SLI-4 bit field tried to use the old (hacky) SLI3 high/low bit settings. Needless to say, it was incorrect. Explicitly set the context field to type FCFI and set it in the wqe. SLI-4 is now a proper bit field so no need for the shifting/anding. Link: https://lore.kernel.org/r/20220506205528.61590-1-jsmart2021@gmail.com Fixes: 6831ce129f19 ("scsi: lpfc: SLI path split: Refactor base ELS paths and the FLOGI path") Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index ef6e8cd8c26a..872a26376ccb 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1330,7 +1330,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == LPFC_SLI_INTF_IF_TYPE_0) { /* FLOGI needs to be 3 for WQE FCFI */ - ct = ((SLI4_CT_FCFI >> 1) & 1) | (SLI4_CT_FCFI & 1); + ct = SLI4_CT_FCFI; bf_set(wqe_ct, &wqe->els_req.wqe_com, ct); /* Set the fcfi to the fcfi we registered with */ From 7752662071053adcdb6b6e7853834205dd60e1c0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 6 May 2022 13:55:48 -0700 Subject: [PATCH 748/803] scsi: lpfc: Correct BDE DMA address assignment for GEN_REQ_WQE Garbage FCoE CT frames are transmitted on the wire because of bad DMA ptr addresses filled in the GEN_REQ_WQE. The __lpfc_sli_prep_gen_req_s4() routine is using the wrong buffer for the payload address. Change the DMA buffer assignment from the bmp buffer to the bpl buffer. Link: https://lore.kernel.org/r/20220506205548.61644-1-jsmart2021@gmail.com Fixes: 61910d6a5243 ("scsi: lpfc: SLI path split: Refactor CT paths") Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index bda2a7ba4e77..6adaf79e67cc 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -10720,10 +10720,10 @@ __lpfc_sli_prep_gen_req_s4(struct lpfc_iocbq *cmdiocbq, struct lpfc_dmabuf *bmp, /* Words 0 - 2 */ bde = (struct ulp_bde64_le *)&cmdwqe->generic.bde; - bde->addr_low = cpu_to_le32(putPaddrLow(bmp->phys)); - bde->addr_high = cpu_to_le32(putPaddrHigh(bmp->phys)); + bde->addr_low = bpl->addr_low; + bde->addr_high = bpl->addr_high; bde->type_size = cpu_to_le32(xmit_len); - bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BLP_64); + bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BDE_64); /* Word 3 */ cmdwqe->gen_req.request_payload_len = xmit_len; From 0807ce0b010418a191e0e4009803b2d74c3245d5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 May 2022 11:13:16 +0800 Subject: [PATCH 749/803] net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe() Switch to using pcim_enable_device() to avoid missing pci_disable_device(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220510031316.1780409-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index fcf17d8a0494..644bb54f5f02 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) From 62e0ae0f4020250f961cf8d0103a4621be74e077 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:23 -0700 Subject: [PATCH 750/803] net: atlantic: fix "frag[0] not initialized" In aq_ring_rx_clean(), if buff->is_eop is not set AND buff->len < AQ_CFG_RX_HDR_SIZE, then hdr_len remains equal to buff->len and skb_add_rx_frag(xxx, *0*, ...) is not called. The loop following this code starts calling skb_add_rx_frag() starting with i=1 and thus frag[0] is never initialized. Since i is initialized to zero at the top of the primary loop, we can just reference and post-increment i instead of hardcoding the 0 when calling skb_add_rx_frag() the first time. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 77e76c9efd32..440423b0e8ea 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -446,7 +446,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, ALIGN(hdr_len, sizeof(long))); if (buff->len - hdr_len > 0) { - skb_add_rx_frag(skb, 0, buff->rxdata.page, + skb_add_rx_frag(skb, i++, buff->rxdata.page, buff->rxdata.pg_off + hdr_len, buff->len - hdr_len, AQ_CFG_RX_FRAME_MAX); @@ -455,7 +455,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; - i = 1U; do { next_ = buff_->next; buff_ = &self->buff_ring[next_]; From 79784d77ebbd3ec516b7a5ce555d979fb7946202 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:24 -0700 Subject: [PATCH 751/803] net: atlantic: reduce scope of is_rsc_complete Don't defer handling the err case outside the loop. That's pointless. And since is_rsc_complete is only used inside this loop, declare it inside the loop to reduce it's scope. Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 440423b0e8ea..bc1952131799 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -346,7 +346,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget) { struct net_device *ndev = aq_nic_get_ndev(self->aq_nic); - bool is_rsc_completed = true; int err = 0; for (; (self->sw_head != self->hw_head) && budget; @@ -366,6 +365,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + bool is_rsc_completed = true; + if (buff_->next >= self->size) { err = -EIO; goto err_exit; @@ -377,18 +378,16 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) - break; + if (unlikely(!is_rsc_completed)) { + err = 0; + goto err_exit; + } buff->is_error |= buff_->is_error; buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); - if (!is_rsc_completed) { - err = 0; - goto err_exit; - } if (buff->is_error || (buff->is_lro && buff->is_cso_err)) { buff_ = buff; From 6aecbba12b5c90b26dc062af3b9de8c4b3a2f19f Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:25 -0700 Subject: [PATCH 752/803] net: atlantic: add check for MAX_SKB_FRAGS Enforce that the CPU can not get stuck in an infinite loop. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index bc1952131799..8201ce7adb77 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -363,6 +363,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, continue; if (!buff->is_eop) { + unsigned int frag_cnt = 0U; buff_ = buff; do { bool is_rsc_completed = true; @@ -371,6 +372,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = -EIO; goto err_exit; } + + frag_cnt++; next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -378,7 +381,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) { + if (unlikely(!is_rsc_completed) || + frag_cnt > MAX_SKB_FRAGS) { err = 0; goto err_exit; } From 2120b7f4d128433ad8c5f503a9584deba0684901 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:26 -0700 Subject: [PATCH 753/803] net: atlantic: verify hw_head_ lies within TX buffer ring Bounds check hw_head index provided by NIC to verify it lies within the TX buffer ring. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index d875ce3ec759..15ede7285fb5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); From 2c50c6867c85afee6f2b3bcbc50fc9d0083d1343 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:06 +0200 Subject: [PATCH 754/803] s390/ctcm: fix variable dereferenced before check Found by cppcheck and smatch. smatch complains about drivers/s390/net/ctcm_sysfs.c:43 ctcm_buffer_write() warn: variable dereferenced before check 'priv' (see line 42) Fixes: 3c09e2647b5e ("ctcm: rename READ/WRITE defines to avoid redefinitions") Reported-by: Colin Ian King Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c index ded1930a00b2..e3813a7aa5e6 100644 --- a/drivers/s390/net/ctcm_sysfs.c +++ b/drivers/s390/net/ctcm_sysfs.c @@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev, struct ctcm_priv *priv = dev_get_drvdata(dev); int rc; - ndev = priv->channel[CTCM_READ]->netdev; - if (!(priv && priv->channel[CTCM_READ] && ndev)) { + if (!(priv && priv->channel[CTCM_READ] && + priv->channel[CTCM_READ]->netdev)) { CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev"); return -ENODEV; } + ndev = priv->channel[CTCM_READ]->netdev; rc = kstrtouint(buf, 0, &bs1); if (rc) From 0c0b20587b9f25a2ad14db7f80ebe49bdf29920a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:07 +0200 Subject: [PATCH 755/803] s390/ctcm: fix potential memory leak smatch complains about drivers/s390/net/ctcm_mpc.c:1210 ctcmpc_unpack_skb() warn: possible memory leak of 'mpcginfo' mpc_action_discontact() did not free mpcginfo. Consolidate the freeing in ctcmpc_unpack_skb(). Fixes: 293d984f0e36 ("ctcm: infrastructure for replaced ctc driver") Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_mpc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index 88abfb5e8045..8ac213a55141 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo) ctcm_clear_busy_do(dev); } - kfree(mpcginfo); - return; } @@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb) CTCM_FUNTAIL, dev->name); priv->stats.rx_dropped++; /* mpcginfo only used for non-data transfers */ - kfree(mpcginfo); if (do_debug_data) ctcmpc_dump_skb(pskb, -8); } + kfree(mpcginfo); } done: @@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg) } break; } - kfree(mpcginfo); CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n", __func__, ch->id, grp->outstanding_xid2, @@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg) mpc_validate_xid(mpcginfo); break; } - kfree(mpcginfo); return; } From 671bb35c8e746439f0ed70815968f9a4f20a8deb Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 10 May 2022 09:05:08 +0200 Subject: [PATCH 756/803] s390/lcs: fix variable dereferenced before check smatch complains about drivers/s390/net/lcs.c:1741 lcs_get_control() warn: variable dereferenced before check 'card->dev' (see line 1739) Fixes: 27eb5ac8f015 ("[PATCH] s390: lcs driver bug fixes and improvements [1/2]") Signed-off-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/lcs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index bab9b34926c6..84c8981317b4 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - if (card->dev) + if (card->dev) { + pr_warn("Stoplan for %s initiated by LGW\n", + card->dev->name); netif_carrier_off(card->dev); + } break; default: LCS_DBF_TEXT(5, trace, "noLGWcmd"); From ee8348496c77e3737d0a6cda307a521f2cff954f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 10 May 2022 14:37:17 +0200 Subject: [PATCH 757/803] KVM: PPC: Book3S PR: Enable MSR_DR for switch_mmu_context() Commit 863771a28e27 ("powerpc/32s: Convert switch_mmu_context() to C") moved the switch_mmu_context() to C. While in principle a good idea, it meant that the function now uses the stack. The stack is not accessible from real mode though. So to keep calling the function, let's turn on MSR_DR while we call it. That way, all pointer references to the stack are handled virtually. In addition, make sure to save/restore r12 on the stack, as it may get clobbered by the C function. Fixes: 863771a28e27 ("powerpc/32s: Convert switch_mmu_context() to C") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Matt Evans Signed-off-by: Alexander Graf Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220510123717.24508-1-graf@amazon.com --- arch/powerpc/kvm/book3s_32_sr.S | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S index e3ab9df6cf19..6cfcd20d4668 100644 --- a/arch/powerpc/kvm/book3s_32_sr.S +++ b/arch/powerpc/kvm/book3s_32_sr.S @@ -122,11 +122,27 @@ /* 0x0 - 0xb */ - /* 'current->mm' needs to be in r4 */ - tophys(r4, r2) - lwz r4, MM(r4) - tophys(r4, r4) - /* This only clobbers r0, r3, r4 and r5 */ + /* switch_mmu_context() needs paging, let's enable it */ + mfmsr r9 + ori r11, r9, MSR_DR + mtmsr r11 + sync + + /* switch_mmu_context() clobbers r12, rescue it */ + SAVE_GPR(12, r1) + + /* Calling switch_mmu_context(, current->mm, ); */ + lwz r4, MM(r2) bl switch_mmu_context + /* restore r12 */ + REST_GPR(12, r1) + + /* Disable paging again */ + mfmsr r9 + li r6, MSR_DR + andc r9, r9, r6 + mtmsr r9 + sync + .endm From a56f445f807b0276fc0660c330bf93a9ea78e8ea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 May 2022 09:37:06 -0400 Subject: [PATCH 758/803] Revert "drm/amd/pm: keep the BACO feature enabled for suspend" This reverts commit eaa090538e8d21801c6d5f94590c3799e6a528b5. Commit ebc002e3ee78 ("drm/amdgpu: don't use BACO for reset in S3") stops using BACO for reset during suspend, so it's no longer necessary to leave BACO enabled during suspend. This fixes resume from suspend on the navy flounder dGPU in the ASUS ROG Strix G513QY. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2008 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1982 Reviewed-by: Lijo Lazar Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index f1544755d8b4..f10a0256413e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1351,14 +1351,8 @@ static int smu_disable_dpms(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; int ret = 0; - /* - * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair - * the workaround which always reset the asic in suspend. - * It's likely that workaround will be dropped in the future. - * Then the change here should be dropped together. - */ bool use_baco = !smu->is_apu && - (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) && + ((amdgpu_in_reset(adev) && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev))); From c65b364c52ba352177dde6944f5efaa29bd40b52 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 May 2022 10:32:26 -0400 Subject: [PATCH 759/803] drm/amdgpu/ctx: only reset stable pstate if the user changed it (v2) Check if the requested stable pstate matches the current one before changing it. This avoids changing the stable pstate on context destroy if the user never changed it in the first place via the IOCTL. v2: compare the current and requested rather than setting a flag (Lijo) Fixes: 8cda7a4f96e435 ("drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 8f0e6d93bb9c..c317078d1afd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -296,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, { struct amdgpu_device *adev = ctx->adev; enum amd_dpm_forced_level level; + u32 current_stable_pstate; int r; mutex_lock(&adev->pm.stable_pstate_ctx_lock); @@ -304,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, goto done; } + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r || (stable_pstate == current_stable_pstate)) + goto done; + switch (stable_pstate) { case AMDGPU_CTX_STABLE_PSTATE_NONE: level = AMD_DPM_FORCED_LEVEL_AUTO; From 8b796475fd7882663a870456466a4fb315cc1bd6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 10 May 2022 16:57:34 +0200 Subject: [PATCH 760/803] net/sched: act_pedit: really ensure the skb is writable Currently pedit tries to ensure that the accessed skb offset is writable via skb_unclone(). The action potentially allows touching any skb bytes, so it may end-up modifying shared data. The above causes some sporadic MPTCP self-test failures, due to this code: tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ index 100 The above modifies a data byte outside the skb head and the skb is a cloned one, carrying a TCP output packet. This change addresses the issue by keeping track of a rough over-estimate highest skb offset accessed by the action and ensuring such offset is really writable. Note that this may cause performance regressions in some scenarios, but hopefully pedit is not in the critical path. Fixes: db2c24175d14 ("act_pedit: access skb->data safely") Acked-by: Mat Martineau Tested-by: Geliang Tang Signed-off-by: Paolo Abeni Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/1fcf78e6679d0a287dd61bb0f04730ce33b3255d.1652194627.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_pedit.h | 1 + net/sched/act_pedit.c | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7e..3e02709a1df6 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c177..0eaaf1f45de1 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index; if (!nla) { @@ -228,6 +228,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + } p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -308,13 +320,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i; - if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock); + max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { @@ -403,6 +420,7 @@ bad: p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; } From 3f95a7472d14abef284d8968734fe2ae7ff4845f Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Tue, 10 May 2022 13:48:46 -0700 Subject: [PATCH 761/803] i40e: i40e_main: fix a missing check on list iterator The bug is here: ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); The list iterator 'ch' will point to a bogus position containing HEAD if the list is empty or no element is found. This case must be checked before any use of the iterator, otherwise it will lead to a invalid memory access. To fix this bug, use a new variable 'iter' as the list iterator, while use the origin variable 'ch' as a dedicated pointer to point to the found element. Cc: stable@vger.kernel.org Fixes: 1d8d80b4e4ff6 ("i40e: Add macvlan support on i40e") Signed-off-by: Xiaomeng Tong Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220510204846.2166999-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 27 +++++++++++---------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6778df2177a1..98871f014994 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7549,42 +7549,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd) { + struct i40e_channel *ch = NULL, *ch_tmp, *iter; int ret = 0, num_tc = 1, i, aq_err; - struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - if (list_empty(&vsi->macvlan_list)) - return -EINVAL; - /* Go through the list and find an available channel */ - list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { - if (!i40e_is_channel_macvlan(ch)) { - ch->fwd = fwd; + list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(iter)) { + iter->fwd = fwd; /* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++) netdev_bind_sb_channel_queue(vsi->netdev, vdev, i, - ch->num_queue_pairs, - ch->base_queue); - for (i = 0; i < ch->num_queue_pairs; i++) { + iter->num_queue_pairs, + iter->base_queue); + for (i = 0; i < iter->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; - pf_q = ch->base_queue + i; + pf_q = iter->base_queue + i; /* Get to TX ring ptr */ tx_ring = vsi->tx_rings[pf_q]; - tx_ring->ch = ch; + tx_ring->ch = iter; /* Get the RX ring ptr */ rx_ring = vsi->rx_rings[pf_q]; - rx_ring->ch = ch; + rx_ring->ch = iter; } + ch = iter; break; } } + if (!ch) + return -EINVAL; + /* Guarantee all rings are updated before we update the * MAC address filter. */ From 9c2136be0878c88c53dea26943ce40bb03ad8d8d Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Wed, 11 May 2022 18:28:36 +0000 Subject: [PATCH 762/803] sched/tracing: Append prev_state to tp args instead Commit fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) added a new prev_state argument to the sched_switch tracepoint, before the prev task_struct pointer. This reordering of arguments broke BPF programs that use the raw tracepoint (e.g. tp_btf programs). The type of the second argument has changed and existing programs that assume a task_struct* argument (e.g. for bpf_task_storage access) will now fail to verify. If we instead append the new argument to the end, all existing programs would continue to work and can conditionally extract the prev_state argument on supported kernel versions. Fixes: fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) Signed-off-by: Delyan Kratunov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt (Google) Link: https://lkml.kernel.org/r/c8a6930dfdd58a4a5755fc01732675472979732b.camel@fb.com --- include/trace/events/sched.h | 6 +++--- kernel/sched/core.c | 2 +- kernel/trace/fgraph.c | 4 ++-- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace_events.c | 8 ++++---- kernel/trace/trace_osnoise.c | 4 ++-- kernel/trace/trace_sched_switch.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- samples/trace_events/trace_custom_sched.h | 6 +++--- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 65e786756321..fbb99a61f714 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, TRACE_EVENT(sched_switch, TP_PROTO(bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next, + unsigned int prev_state), - TP_ARGS(preempt, prev_state, prev, next), + TP_ARGS(preempt, prev, next, prev_state), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51efaabac3e4..d58c0389eb23 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next); + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 8f4fb328133a..a7e84c8543cb 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -404,9 +404,9 @@ free: static void ftrace_graph_probe_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { unsigned long long timestamp; int index; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f1d2f5e7263..af899b058c8d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) static void ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *pid_list; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e11e167b7809..f97de82d1342 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable) static void event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; @@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, static void event_filter_pid_sched_switch_probe_post(void *data, bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, + unsigned int prev_state) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index e9ae1f33a7f0..afb92e2f0aea 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) */ static void trace_sched_switch_callback(void *data, bool preempt, - unsigned int prev_state, struct task_struct *p, - struct task_struct *n) + struct task_struct *n, + unsigned int prev_state) { struct osnoise_variables *osn_var = this_cpu_osn_var(); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 45796d8bd4b2..c9ffdcfe622e 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex); static void probe_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, - struct task_struct *prev, struct task_struct *next) + struct task_struct *prev, struct task_struct *next, + unsigned int prev_state) { int flags; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 46429f9a96fa..330aee1c1a49 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, static void notrace probe_wakeup_sched_switch(void *ignore, bool preempt, - unsigned int prev_state, - struct task_struct *prev, struct task_struct *next) + struct task_struct *prev, struct task_struct *next, + unsigned int prev_state) { struct trace_array_cpu *data; u64 T0, T1, delta; diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h index 9fdd8e7c2a45..951388334a3f 100644 --- a/samples/trace_events/trace_custom_sched.h +++ b/samples/trace_events/trace_custom_sched.h @@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch, * that the custom event is using. */ TP_PROTO(bool preempt, - unsigned int prev_state, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next, + unsigned int prev_state), - TP_ARGS(preempt, prev_state, prev, next), + TP_ARGS(preempt, prev, next, prev_state), /* * The next fields are where the customization happens. From 103a2f3255a95991252f8f13375c3a96a75011cd Mon Sep 17 00:00:00 2001 From: Itay Iellin Date: Sat, 7 May 2022 08:32:48 -0400 Subject: [PATCH 763/803] Bluetooth: Fix the creation of hdev->name Set a size limit of 8 bytes of the written buffer to "hdev->name" including the terminating null byte, as the size of "hdev->name" is 8 bytes. If an id value which is greater than 9999 is allocated, then the "snprintf(hdev->name, sizeof(hdev->name), "hci%d", id)" function call would lead to a truncation of the id value in decimal notation. Set an explicit maximum id parameter in the id allocation function call. The id allocation function defines the maximum allocated id value as the maximum id parameter value minus one. Therefore, HCI_MAX_ID is defined as 10000. Signed-off-by: Itay Iellin Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +++ net/bluetooth/hci_core.c | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8abd08245326..62d7b81b1cb7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -36,6 +36,9 @@ /* HCI priority */ #define HCI_PRIO_MAX 7 +/* HCI maximum id value */ +#define HCI_MAX_ID 10000 + /* HCI Core structures */ struct inquiry_data { bdaddr_t bdaddr; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b4782a6c1025..45c2dd2e1590 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: - id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL); break; case HCI_AMP: - id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL); + id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL); break; default: return -EINVAL; @@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - sprintf(hdev->name, "hci%d", id); + snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); From 00832b1d1a393dfb1b9491d085e5b27e8c25d103 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 11 May 2022 11:08:29 +0800 Subject: [PATCH 764/803] net: ethernet: mediatek: ppe: fix wrong size passed to memset() 'foe_table' is a pointer, the real size of struct mtk_foe_entry should be pass to memset(). Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Signed-off-by: Yang Yingliang Acked-by: Felix Fietkau Link: https://lore.kernel.org/r/20220511030829.3308094-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 3ad10c793308..66298e2235c9 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 }; int i, k; - memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table)); + memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table)); if (!IS_ENABLED(CONFIG_SOC_MT7621)) return; From 6b77c06655b8a749c1a3d9ebc51e9717003f7e5a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 May 2022 20:17:51 -0700 Subject: [PATCH 765/803] net: bcmgenet: Check for Wake-on-LAN interrupt probe deferral The interrupt controller supplying the Wake-on-LAN interrupt line maybe modular on some platforms (irq-bcm7038-l1.c) and might be probed at a later time than the GENET driver. We need to specifically check for -EPROBE_DEFER and propagate that error to ensure that we eventually fetch the interrupt descriptor. Fixes: 9deb48b53e7f ("bcmgenet: add WOL IRQ check") Fixes: 5b1f0e62941b ("net: bcmgenet: Avoid touching non-existent interrupt") Signed-off-by: Florian Fainelli Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20220511031752.2245566-1-f.fainelli@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index bf1ec8fdc2ad..e87e46c47387 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3999,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } priv->wol_irq = platform_get_irq_optional(pdev, 2); + if (priv->wol_irq == -EPROBE_DEFER) { + err = priv->wol_irq; + goto err; + } priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { From 6fed53de560768bde6d701a7c79c253b45b259e3 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Tue, 10 May 2022 21:51:48 +0800 Subject: [PATCH 766/803] drm/vc4: hdmi: Fix build error for implicit function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_connector_detect’: drivers/gpu/drm/vc4/vc4_hdmi.c:228:7: error: implicit declaration of function ‘gpiod_get_value_cansleep’; did you mean ‘gpio_get_value_cansleep’? [-Werror=implicit-function-declaration] if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) ^~~~~~~~~~~~~~~~~~~~~~~~ gpio_get_value_cansleep CC [M] drivers/gpu/drm/vc4/vc4_validate.o CC [M] drivers/gpu/drm/vc4/vc4_v3d.o CC [M] drivers/gpu/drm/vc4/vc4_validate_shaders.o CC [M] drivers/gpu/drm/vc4/vc4_debugfs.o drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_bind’: drivers/gpu/drm/vc4/vc4_hdmi.c:2883:23: error: implicit declaration of function ‘devm_gpiod_get_optional’; did you mean ‘devm_clk_get_optional’? [-Werror=implicit-function-declaration] vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); ^~~~~~~~~~~~~~~~~~~~~~~ devm_clk_get_optional drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: error: ‘GPIOD_IN’ undeclared (first use in this function); did you mean ‘GPIOF_IN’? vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); ^~~~~~~~ GPIOF_IN drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: note: each undeclared identifier is reported only once for each function it appears in cc1: all warnings being treated as errors Fixes: 6800234ceee0 ("drm/vc4: hdmi: Convert to gpiod") Signed-off-by: Hui Tang Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220510135148.247719-1-tanghui20@huawei.com --- drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 6c58b0fd13fb..98b78ec6b37d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include From 2de7689c7caa55949da5a56be02ec41a86ac1725 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 11 May 2022 17:20:30 +0100 Subject: [PATCH 767/803] arm64: cpufeature: remove duplicate ID_AA64ISAR2_EL1 entry The ID register table should have one entry per ID register but currently has two entries for ID_AA64ISAR2_EL1. Only one entry has an override, and get_arm64_ftr_reg() can end up choosing the other, causing the override to be ignored. Fix this by removing the duplicate entry. While here, also make the check in sort_ftr_regs() more strict so that duplicate entries can't be added in the future. Fixes: def8c222f054 ("arm64: Add support of PAuth QARMA3 architected algorithm") Signed-off-by: Kristina Martsenko Reviewed-by: Vladimir Murzin Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220511162030.1403386-1-kristina.martsenko@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..2cb9cc9e0eff 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -654,7 +654,6 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), - ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, &id_aa64isar2_override), @@ -810,7 +809,7 @@ static void __init sort_ftr_regs(void) * to sys_id for subsequent binary search in get_arm64_ftr_reg() * to work correctly. */ - BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); + BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id); } } From 810c2f0a3f86158c1e02e74947b66d811473434a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 11 May 2022 14:57:47 +0300 Subject: [PATCH 768/803] mlxsw: Avoid warning during ip6gre device removal IPv6 addresses which are used for tunnels are stored in a hash table with reference counting. When a new GRE tunnel is configured, the driver is notified and configures it in hardware. Currently, any change in the tunnel is not applied in the driver. It means that if the remote address is changed, the driver is not aware of this change and the first address will be used. This behavior results in a warning [1] in scenarios such as the following: # ip link add name gre1 type ip6gre local 2000::3 remote 2000::fffe tos inherit ttl inherit # ip link set name gre1 type ip6gre local 2000::3 remote 2000::ffff ttl inherit # ip link delete gre1 The change of the address is not applied in the driver. Currently, the driver uses the remote address which is stored in the 'parms' of the overlay device. When the tunnel is removed, the new IPv6 address is used, the driver tries to release it, but as it is not aware of the change, this address is not configured and it warns about releasing non existing IPv6 address. Fix it by using the IPv6 address which is cached in the IPIP entry, this address is the last one that the driver used, so even in cases such the above, the first address will be released, without any warning. [1]: WARNING: CPU: 1 PID: 2197 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:2920 mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] ... CPU: 1 PID: 2197 Comm: ip Not tainted 5.17.0-rc8-custom-95062-gc1e5ded51a9a #84 Hardware name: Mellanox Technologies Ltd. MSN4700/VMOD0010, BIOS 5.11 07/12/2021 RIP: 0010:mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] ... Call Trace: mlxsw_sp2_ipip_rem_addr_unset_gre6+0xf1/0x120 [mlxsw_spectrum] mlxsw_sp_netdevice_ipip_ol_event+0xdb/0x640 [mlxsw_spectrum] mlxsw_sp_netdevice_event+0xc4/0x850 [mlxsw_spectrum] raw_notifier_call_chain+0x3c/0x50 call_netdevice_notifiers_info+0x2f/0x80 unregister_netdevice_many+0x311/0x6d0 rtnl_dellink+0x136/0x360 rtnetlink_rcv_msg+0x12f/0x380 netlink_rcv_skb+0x49/0xf0 netlink_unicast+0x233/0x340 netlink_sendmsg+0x202/0x440 ____sys_sendmsg+0x1f3/0x220 ___sys_sendmsg+0x70/0xb0 __sys_sendmsg+0x54/0xa0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: e846efe2737b ("mlxsw: spectrum: Add hash table for IPv6 address mapping") Reported-by: Maksym Yaremchuk Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220511115747.238602-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 01cf5a6a26bd..a2ee695a3f17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -568,10 +568,8 @@ static int mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { - struct __ip6_tnl_parm parms6; - - parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); - return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr, + return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, + &ipip_entry->parms.daddr.addr6, &ipip_entry->dip_kvdl_index); } @@ -579,10 +577,7 @@ static void mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_entry *ipip_entry) { - struct __ip6_tnl_parm parms6; - - parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); - mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr); + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); } static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { From 51f559d66527e238f9a5f82027bff499784d4eac Mon Sep 17 00:00:00 2001 From: Shreyas K K Date: Thu, 12 May 2022 16:31:34 +0530 Subject: [PATCH 769/803] arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs Add KRYO4XX gold/big cores to the list of CPUs that need the repeat TLBI workaround. Apply this to the affected KRYO4XX cores (rcpe to rfpe). The variant and revision bits are implementation defined and are different from the their Cortex CPU counterparts on which they are based on, i.e., (r0p0 to r3p0) is equivalent to (rcpe to rfpe). Signed-off-by: Shreyas K K Reviewed-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/20220512110134.12179-1-quic_shrekk@quicinc.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 3 +++ arch/arm64/kernel/cpu_errata.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 466cb9e89047..d27db84d585e 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -189,6 +189,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 | ++----------------+-----------------+-----------------+-----------------------------+ + +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4c9b5b4b7a0b..a0f3d0aaa3c5 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif {}, From 4031cd95cba70c72e4cadc2d46624bcd31e5a6c0 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Tue, 10 May 2022 13:13:00 +0800 Subject: [PATCH 770/803] usb: typec: tcpci_mt6360: Update for BMC PHY setting Update MT6360 BMC PHY Tx/Rx setting for the compatibility. Macpaul reported this CtoDP cable attention message cannot be received from MT6360 TCPC. But actually, attention message really sent from UFP_D device. After RD's comment, there may be BMC PHY Tx/Rx setting causes this issue. Below's the detailed TCPM log and DP attention message didn't received from 6360 TCPCI. [ 1206.367775] Identity: 0000:0000.0000 [ 1206.416570] Alternate mode 0: SVID 0xff01, VDO 1: 0x00000405 [ 1206.447378] AMS DFP_TO_UFP_ENTER_MODE start [ 1206.447383] PD TX, header: 0x1d6f [ 1206.449393] PD TX complete, status: 0 [ 1206.454110] PD RX, header: 0x184f [1] [ 1206.456867] Rx VDM cmd 0xff018144 type 1 cmd 4 len 1 [ 1206.456872] AMS DFP_TO_UFP_ENTER_MODE finished [ 1206.456873] cc:=4 [ 1206.473100] AMS STRUCTURED_VDMS start [ 1206.473103] PD TX, header: 0x2f6f [ 1206.475397] PD TX complete, status: 0 [ 1206.480442] PD RX, header: 0x2a4f [1] [ 1206.483145] Rx VDM cmd 0xff018150 type 1 cmd 16 len 2 [ 1206.483150] AMS STRUCTURED_VDMS finished [ 1206.483151] cc:=4 [ 1206.505643] AMS STRUCTURED_VDMS start [ 1206.505646] PD TX, header: 0x216f [ 1206.507933] PD TX complete, status: 0 [ 1206.512664] PD RX, header: 0x1c4f [1] [ 1206.515456] Rx VDM cmd 0xff018151 type 1 cmd 17 len 1 [ 1206.515460] AMS STRUCTURED_VDMS finished [ 1206.515461] cc:=4 Fixes: e1aefcdd394fd ("usb typec: mt6360: Add support for mt6360 Type-C driver") Cc: stable Reported-by: Macpaul Lin Tested-by: Macpaul Lin Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: ChiYuan Huang Signed-off-by: Fabien Parent Link: https://lore.kernel.org/r/1652159580-30959-1-git-send-email-u0084500@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_mt6360.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci_mt6360.c b/drivers/usb/typec/tcpm/tcpci_mt6360.c index f1bd9e09bc87..8a952eaf9016 100644 --- a/drivers/usb/typec/tcpm/tcpci_mt6360.c +++ b/drivers/usb/typec/tcpm/tcpci_mt6360.c @@ -15,6 +15,9 @@ #include "tcpci.h" +#define MT6360_REG_PHYCTRL1 0x80 +#define MT6360_REG_PHYCTRL3 0x82 +#define MT6360_REG_PHYCTRL7 0x86 #define MT6360_REG_VCONNCTRL1 0x8C #define MT6360_REG_MODECTRL2 0x8F #define MT6360_REG_SWRESET 0xA0 @@ -22,6 +25,8 @@ #define MT6360_REG_DRPCTRL1 0xA2 #define MT6360_REG_DRPCTRL2 0xA3 #define MT6360_REG_I2CTORST 0xBF +#define MT6360_REG_PHYCTRL11 0xCA +#define MT6360_REG_RXCTRL1 0xCE #define MT6360_REG_RXCTRL2 0xCF #define MT6360_REG_CTDCTRL2 0xEC @@ -106,6 +111,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata) if (ret) return ret; + /* BMC PHY */ + ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_PHYCTRL3, 0x82); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36); + if (ret) + return ret; + + ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60); + if (ret) + return ret; + + ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8); + if (ret) + return ret; + /* Set shipping mode off, AUTOIDLE on */ return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A); } From 5f0b5f4d50fa0faa8c76ef9d42a42e8d43f98b44 Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Sun, 8 May 2022 23:02:47 +0800 Subject: [PATCH 771/803] usb: gadget: fix race when gadget driver register via ioctl The usb_gadget_register_driver can be called multi time by to threads via USB_RAW_IOCTL_RUN ioctl syscall, which will lead to multiple registrations. Call trace: driver_register+0x220/0x3a0 drivers/base/driver.c:171 usb_gadget_register_driver_owner+0xfb/0x1e0 drivers/usb/gadget/udc/core.c:1546 raw_ioctl_run drivers/usb/gadget/legacy/raw_gadget.c:513 [inline] raw_ioctl+0x1883/0x2730 drivers/usb/gadget/legacy/raw_gadget.c:1220 ioctl USB_RAW_IOCTL_RUN This routine allows two processes to register the same driver instance via ioctl syscall. which lead to a race condition. Please refer to the following scenarios. T1 T2 ------------------------------------------------------------------ usb_gadget_register_driver_owner driver_register driver_register driver_find driver_find bus_add_driver bus_add_driver priv alloced drv->p = priv; kobject_init_and_add // refcount = 1; //couldn't find an available UDC or it's busy priv alloced drv->priv = priv; kobject_init_and_add ---> refcount = 1 <------ // register success ===================== another ioctl/process ====================== driver_register driver_find k = kset_find_obj() ---> refcount = 2 <------ driver_unregister // drv->p become T2's priv ---> refcount = 1 <------ kobject_put(k) ---> refcount = 0 <------ return priv->driver; --------UAF here---------- There will be UAF in this scenario. We can fix it by adding a new STATE_DEV_REGISTERING device state to avoid double register. Reported-by: syzbot+dc7c3ca638e773db07f6@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/000000000000e66c2805de55b15a@google.com/ Reviewed-by: Andrey Konovalov Signed-off-by: Schspa Shi Link: https://lore.kernel.org/r/20220508150247.38204-1-schspa@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 8d40a1f2ec57..e9440f7bf019 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -145,6 +145,7 @@ enum dev_state { STATE_DEV_INVALID = 0, STATE_DEV_OPENED, STATE_DEV_INITIALIZED, + STATE_DEV_REGISTERING, STATE_DEV_RUNNING, STATE_DEV_CLOSED, STATE_DEV_FAILED @@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) ret = -EINVAL; goto out_unlock; } + dev->state = STATE_DEV_REGISTERING; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_gadget_probe_driver(&dev->driver); From c237566b78ad8c72bc0431c5d6171db8d12e6f94 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 12 May 2022 14:49:30 +0800 Subject: [PATCH 772/803] usb: xhci-mtk: fix fs isoc's transfer error Due to the scheduler allocates the optimal bandwidth for FS ISOC endpoints, this may be not enough actually and causes data transfer error, so come up with an estimate that is no less than the worst case bandwidth used for any one mframe, but may be an over-estimate. Fixes: 451d3912586a ("usb: xhci-mtk: update fs bus bandwidth by bw_budget_table") Cc: stable@vger.kernel.org Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220512064931.31670-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index f3139ce7b0a9..953d2cd1d4cc 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -464,7 +464,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset) */ for (j = 0; j < sch_ep->num_budget_microframes; j++) { k = XHCI_MTK_BW_INDEX(base + j); - tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j]; + tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe; if (tmp > FS_PAYLOAD_MAX) return -ESCH_BW_OVERFLOW; } @@ -538,19 +538,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset) static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used) { struct mu3h_sch_tt *tt = sch_ep->sch_tt; + int bw_updated; u32 base; - int i, j, k; + int i, j; + + bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1); for (i = 0; i < sch_ep->num_esit; i++) { base = sch_ep->offset + i * sch_ep->esit; - for (j = 0; j < sch_ep->num_budget_microframes; j++) { - k = XHCI_MTK_BW_INDEX(base + j); - if (used) - tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j]; - else - tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j]; - } + for (j = 0; j < sch_ep->num_budget_microframes; j++) + tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated; } if (used) From 1645eee0d7f623660e6ce0f1aef4591788a0c9da Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 12 May 2022 14:49:31 +0800 Subject: [PATCH 773/803] usb: xhci-mtk: remove bandwidth budget table The bandwidth budget table is introduced to trace ideal bandwidth used by each INT/ISOC endpoint, but in fact the endpoint may consume more bandwidth and cause data transfer error, so it's better to leave some margin. Obviously it's difficult to find the best margin for all cases, instead take use of the worst-case scenario. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220512064931.31670-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 74 ++++++--------------------------- drivers/usb/host/xhci-mtk.h | 2 - 2 files changed, 12 insertions(+), 64 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 953d2cd1d4cc..06a6b19acaae 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -19,11 +19,6 @@ #define HS_BW_BOUNDARY 6144 /* usb2 spec section11.18.1: at most 188 FS bytes per microframe */ #define FS_PAYLOAD_MAX 188 -/* - * max number of microframes for split transfer, - * for fs isoc in : 1 ss + 1 idle + 7 cs - */ -#define TT_MICROFRAMES_MAX 9 #define DBG_BUF_EN 64 @@ -242,28 +237,17 @@ static void drop_tt(struct usb_device *udev) static struct mu3h_sch_ep_info * create_sch_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev, - struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx) + struct usb_host_endpoint *ep) { struct mu3h_sch_ep_info *sch_ep; struct mu3h_sch_bw_info *bw_info; struct mu3h_sch_tt *tt = NULL; - u32 len_bw_budget_table; bw_info = get_bw_info(mtk, udev, ep); if (!bw_info) return ERR_PTR(-ENODEV); - if (is_fs_or_ls(udev->speed)) - len_bw_budget_table = TT_MICROFRAMES_MAX; - else if ((udev->speed >= USB_SPEED_SUPER) - && usb_endpoint_xfer_isoc(&ep->desc)) - len_bw_budget_table = get_esit(ep_ctx); - else - len_bw_budget_table = 1; - - sch_ep = kzalloc(struct_size(sch_ep, bw_budget_table, - len_bw_budget_table), - GFP_KERNEL); + sch_ep = kzalloc(sizeof(*sch_ep), GFP_KERNEL); if (!sch_ep) return ERR_PTR(-ENOMEM); @@ -295,8 +279,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, u32 mult; u32 esit_pkts; u32 max_esit_payload; - u32 *bwb_table = sch_ep->bw_budget_table; - int i; ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2)); maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2)); @@ -332,7 +314,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, */ sch_ep->pkts = max_burst + 1; sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; - bwb_table[0] = sch_ep->bw_cost_per_microframe; } else if (sch_ep->speed >= USB_SPEED_SUPER) { /* usb3_r1 spec section4.4.7 & 4.4.8 */ sch_ep->cs_count = 0; @@ -349,7 +330,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) { sch_ep->pkts = esit_pkts; sch_ep->num_budget_microframes = 1; - bwb_table[0] = maxpkt * sch_ep->pkts; } if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) { @@ -366,15 +346,8 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, DIV_ROUND_UP(esit_pkts, sch_ep->pkts); sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1); - sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; - - for (i = 0; i < sch_ep->num_budget_microframes - 1; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - - /* last one <= bw_cost_per_microframe */ - bwb_table[i] = maxpkt * esit_pkts - - i * sch_ep->bw_cost_per_microframe; } + sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts; } else if (is_fs_or_ls(sch_ep->speed)) { sch_ep->pkts = 1; /* at most one packet for each microframe */ @@ -384,28 +357,7 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx, */ sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX); sch_ep->num_budget_microframes = sch_ep->cs_count; - sch_ep->bw_cost_per_microframe = - (maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX; - - /* init budget table */ - if (ep_type == ISOC_OUT_EP) { - for (i = 0; i < sch_ep->num_budget_microframes; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - } else if (ep_type == INT_OUT_EP) { - /* only first one consumes bandwidth, others as zero */ - bwb_table[0] = sch_ep->bw_cost_per_microframe; - } else { /* INT_IN_EP or ISOC_IN_EP */ - bwb_table[0] = 0; /* start split */ - bwb_table[1] = 0; /* idle */ - /* - * due to cs_count will be updated according to cs - * position, assign all remainder budget array - * elements as @bw_cost_per_microframe, but only first - * @num_budget_microframes elements will be used later - */ - for (i = 2; i < TT_MICROFRAMES_MAX; i++) - bwb_table[i] = sch_ep->bw_cost_per_microframe; - } + sch_ep->bw_cost_per_microframe = min_t(u32, maxpkt, FS_PAYLOAD_MAX); } } @@ -422,7 +374,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw, for (j = 0; j < sch_ep->num_budget_microframes; j++) { k = XHCI_MTK_BW_INDEX(base + j); - bw = sch_bw->bus_bw[k] + sch_ep->bw_budget_table[j]; + bw = sch_bw->bus_bw[k] + sch_ep->bw_cost_per_microframe; if (bw > max_bw) max_bw = bw; } @@ -433,18 +385,16 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw, static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw, struct mu3h_sch_ep_info *sch_ep, bool used) { + int bw_updated; u32 base; - int i, j, k; + int i, j; + + bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1); for (i = 0; i < sch_ep->num_esit; i++) { base = sch_ep->offset + i * sch_ep->esit; - for (j = 0; j < sch_ep->num_budget_microframes; j++) { - k = XHCI_MTK_BW_INDEX(base + j); - if (used) - sch_bw->bus_bw[k] += sch_ep->bw_budget_table[j]; - else - sch_bw->bus_bw[k] -= sch_ep->bw_budget_table[j]; - } + for (j = 0; j < sch_ep->num_budget_microframes; j++) + sch_bw->bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated; } } @@ -708,7 +658,7 @@ static int add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, xhci_dbg(xhci, "%s %s\n", __func__, decode_ep(ep, udev->speed)); - sch_ep = create_sch_ep(mtk, udev, ep, ep_ctx); + sch_ep = create_sch_ep(mtk, udev, ep); if (IS_ERR_OR_NULL(sch_ep)) return -ENOMEM; diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index ffd4b493b4ba..1174a510dd38 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -83,7 +83,6 @@ struct mu3h_sch_bw_info { * times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets * according to @pkts and @repeat. normal mode is used by * default - * @bw_budget_table: table to record bandwidth budget per microframe */ struct mu3h_sch_ep_info { u32 esit; @@ -109,7 +108,6 @@ struct mu3h_sch_ep_info { u32 pkts; u32 cs_count; u32 burst_mode; - u32 bw_budget_table[]; }; #define MU3C_U3_PORT_MAX 4 From b7be130c5d52e5224ac7d89568737b37b4c4b785 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 May 2022 19:17:31 -0700 Subject: [PATCH 774/803] net: dsa: bcm_sf2: Fix Wake-on-LAN with mac_link_down() After commit 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of state->link") the interface suspend path would call our mac_link_down() call back which would forcibly set the link down, thus preventing Wake-on-LAN packets from reaching our management port. Fix this by looking at whether the port is enabled for Wake-on-LAN and not clearing the link status in that case to let packets go through. Fixes: 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of state->link") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220512021731.2494261-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cf82b1fa9725..87e81c636339 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -809,6 +809,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port, struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg, offset; + if (priv->wol_ports_mask & BIT(port)) + return; + if (port != core_readl(priv, CORE_IMP0_PRT_ID)) { if (priv->type == BCM4908_DEVICE_ID || priv->type == BCM7445_DEVICE_ID) From f3c46e41b32b6266cf60b0985c61748f53bf1c61 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Thu, 12 May 2022 11:08:20 +0800 Subject: [PATCH 775/803] net/smc: non blocking recvmsg() return -EAGAIN when no data and signal_pending Non blocking sendmsg will return -EAGAIN when any signal pending and no send space left, while non blocking recvmsg return -EINTR when signal pending and no data received. This may makes confused. As TCP returns -EAGAIN in the conditions described above. Align the behavior of smc with TCP. Fixes: 846e344eb722 ("net/smc: add receive timeout check") Signed-off-by: Guangguan Wang Reviewed-by: Tony Lu Acked-by: Karsten Graul Link: https://lore.kernel.org/r/20220512030820.73848-1-guangguan.wang@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 51e8eb2933ff..338b9ef806e8 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, } break; } + if (!timeo) + return -EAGAIN; if (signal_pending(current)) { read_done = sock_intr_errno(timeo); break; } - if (!timeo) - return -EAGAIN; } if (!smc_rx_data_available(conn)) { From 1fa89ffbc04545b7582518e57f4b63e2a062870f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 12 May 2022 05:47:09 +0000 Subject: [PATCH 776/803] net: sfc: ef10: fix memory leak in efx_ef10_mtd_probe() In the NIC ->probe() callback, ->mtd_probe() callback is called. If NIC has 2 ports, ->probe() is called twice and ->mtd_probe() too. In the ->mtd_probe(), which is efx_ef10_mtd_probe() it allocates and initializes mtd partiion. But mtd partition for sfc is shared data. So that allocated mtd partition data from last called efx_ef10_mtd_probe() will not be used. Therefore it must be freed. But it doesn't free a not used mtd partition data in efx_ef10_mtd_probe(). kmemleak reports: unreferenced object 0xffff88811ddb0000 (size 63168): comm "systemd-udevd", pid 265, jiffies 4294681048 (age 348.586s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_order_trace+0x19/0x120 [] __kmalloc+0x20e/0x250 [] efx_ef10_mtd_probe+0x11f/0x270 [sfc] [] efx_pci_probe.cold.17+0x3df/0x53d [sfc] [] local_pci_probe+0xdc/0x170 [] pci_device_probe+0x235/0x680 [] really_probe+0x1c2/0x8f0 [] __driver_probe_device+0x2ab/0x460 [] driver_probe_device+0x4a/0x120 [] __driver_attach+0x16e/0x320 [] bus_for_each_dev+0x110/0x190 [] bus_add_driver+0x39e/0x560 [] driver_register+0x18e/0x310 [] 0xffffffffc02e2055 [] do_one_initcall+0xc3/0x450 [] do_init_module+0x1b4/0x700 Acked-by: Martin Habets Fixes: 8127d661e77f ("sfc: Add support for Solarflare SFC9100 family") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20220512054709.12513-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 50d535981a35..f8edb3f1b73a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) n_parts++; } + if (!n_parts) { + kfree(parts); + return 0; + } + rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); fail: if (rc) From 3740651bf7e200109dd42d5b2fb22226b26f960a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 12 May 2022 12:18:30 +0300 Subject: [PATCH 777/803] tls: Fix context leak on tls_device_down The commit cited below claims to fix a use-after-free condition after tls_device_down. Apparently, the description wasn't fully accurate. The context stayed alive, but ctx->netdev became NULL, and the offload was torn down without a proper fallback, so a bug was present, but a different kind of bug. Due to misunderstanding of the issue, the original patch dropped the refcount_dec_and_test line for the context to avoid the alleged premature deallocation. That line has to be restored, because it matches the refcount_inc_not_zero from the same function, otherwise the contexts that survived tls_device_down are leaked. This patch fixes the described issue by restoring refcount_dec_and_test. After this change, there is no leak anymore, and the fallback to software kTLS still works. Fixes: c55dcdd435aa ("net/tls: Fix use-after-free after the TLS device goes down and up") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220512091830.678684-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index af875ad4a822..3919fe2c58c5 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1347,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev) /* Device contexts for RX and TX will be freed in on sk_destruct * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + * Now release the ref taken above. */ + if (refcount_dec_and_test(&ctx->refcount)) + tls_device_free_ctx(ctx); } up_write(&device_offload_lock); From 280abe14b6e0a38de9cc86fe6a019523aadd8f70 Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Mon, 9 May 2022 11:06:37 +0200 Subject: [PATCH 778/803] x86/mm: Fix marking of unused sub-pmd ranges The unused part precedes the new range spanned by the start, end parameters of vmemmap_use_new_sub_pmd(). This means it actually goes from ALIGN_DOWN(start, PMD_SIZE) up to start. Use the correct address when applying the mark using memset. Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Thomas Gleixner Reviewed-by: Oscar Salvador Reviewed-by: David Hildenbrand Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220509090637.24152-2-ken@codelabs.ch --- arch/x86/mm/init_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 96d34ebb20a9..e2942335d143 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -902,6 +902,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { + const unsigned long page = ALIGN_DOWN(start, PMD_SIZE); + vmemmap_flush_unused_pmd(); /* @@ -914,8 +916,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long * Mark with PAGE_UNUSED the unused parts of the new memmap range */ if (!IS_ALIGNED(start, PMD_SIZE)) - memset((void *)start, PAGE_UNUSED, - start - ALIGN_DOWN(start, PMD_SIZE)); + memset((void *)page, PAGE_UNUSED, start - page); /* * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of From 0286300e60455534b23f4b86ce79247829ceddb8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe via iommu Date: Mon, 9 May 2022 13:19:19 -0300 Subject: [PATCH 779/803] iommu: iommu_group_claim_dma_owner() must always assign a domain Once the group enters 'owned' mode it can never be assigned back to the default_domain or to a NULL domain. It must always be actively assigned to a current domain. If the caller hasn't provided a domain then the core must provide an explicit DMA blocking domain that has no DMA map. Lazily create a group-global blocking DMA domain when iommu_group_claim_dma_owner is first called and immediately assign the group to it. This ensures that DMA is immediately fully isolated on all IOMMU drivers. If the user attaches/detaches while owned then detach will set the group back to the blocking domain. Slightly reorganize the call chains so that __iommu_group_set_core_domain() is the function that removes any caller configured domain and sets the domains back a core owned domain with an appropriate lifetime. __iommu_group_set_domain() is the worker function that can change the domain assigned to a group to any target domain, including NULL. Add comments clarifying how the NULL vs detach_dev vs default_domain works based on Robin's remarks. This fixes an oops with VFIO and SMMUv3 because VFIO will call iommu_detach_group() and then immediately iommu_domain_free(), but SMMUv3 has no way to know that the domain it is holding a pointer to has been freed. Now the iommu_detach_group() will assign the blocking domain and SMMUv3 will no longer hold a stale domain reference. Fixes: 1ea2a07a532b ("iommu: Add DMA ownership management interfaces") Reported-by: Qian Cai Tested-by: Baolu Lu Tested-by: Nicolin Chen Co-developed-by: Robin Murphy Signed-off-by: Robin Murphy Signed-off-by: Jason Gunthorpe -- Just minor polishing as discussed v3: - Change names to __iommu_group_set_domain() / __iommu_group_set_core_domain() - Clarify comments - Call __iommu_group_set_domain() directly in iommu_group_release_dma_owner() since we know it is always selecting the default_domain - Remove redundant detach_dev ops check in __iommu_detach_device and make the added WARN_ON fail instead - Check for blocking_domain in __iommu_attach_group() so VFIO can actually attach a new group - Update comments and spelling - Fix missed change to new_domain in iommu_group_do_detach_device() v2: https://lore.kernel.org/r/0-v2-f62259511ac0+6-iommu_dma_block_jgg@nvidia.com v1: https://lore.kernel.org/r/0-v1-6e9d2d0a759d+11b-iommu_dma_block_jgg@nvidia.com Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v3-db7f0785022b+149-iommu_dma_block_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 127 ++++++++++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 36 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0c42ece25854..0b22e51e90f4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -44,6 +44,7 @@ struct iommu_group { char *name; int id; struct iommu_domain *default_domain; + struct iommu_domain *blocking_domain; struct iommu_domain *domain; struct list_head entry; unsigned int owner_cnt; @@ -82,8 +83,8 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev); static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group); +static int __iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *new_domain); static int iommu_create_device_direct_mappings(struct iommu_group *group, struct device *dev); static struct iommu_group *iommu_group_get_for_dev(struct device *dev); @@ -596,6 +597,8 @@ static void iommu_group_release(struct kobject *kobj) if (group->default_domain) iommu_domain_free(group->default_domain); + if (group->blocking_domain) + iommu_domain_free(group->blocking_domain); kfree(group->name); kfree(group); @@ -1907,6 +1910,24 @@ void iommu_domain_free(struct iommu_domain *domain) } EXPORT_SYMBOL_GPL(iommu_domain_free); +/* + * Put the group's domain back to the appropriate core-owned domain - either the + * standard kernel-mode DMA configuration or an all-DMA-blocked domain. + */ +static void __iommu_group_set_core_domain(struct iommu_group *group) +{ + struct iommu_domain *new_domain; + int ret; + + if (group->owner) + new_domain = group->blocking_domain; + else + new_domain = group->default_domain; + + ret = __iommu_group_set_domain(group, new_domain); + WARN(ret, "iommu driver failed to attach the default/blocking domain"); +} + static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -1963,9 +1984,6 @@ static void __iommu_detach_device(struct iommu_domain *domain, if (iommu_is_attach_deferred(dev)) return; - if (unlikely(domain->ops->detach_dev == NULL)) - return; - domain->ops->detach_dev(domain, dev); trace_detach_device_from_domain(dev); } @@ -1979,12 +1997,10 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) return; mutex_lock(&group->mutex); - if (iommu_group_device_count(group) != 1) { - WARN_ON(1); + if (WARN_ON(domain != group->domain) || + WARN_ON(iommu_group_device_count(group) != 1)) goto out_unlock; - } - - __iommu_detach_group(domain, group); + __iommu_group_set_core_domain(group); out_unlock: mutex_unlock(&group->mutex); @@ -2040,7 +2056,8 @@ static int __iommu_attach_group(struct iommu_domain *domain, { int ret; - if (group->domain && group->domain != group->default_domain) + if (group->domain && group->domain != group->default_domain && + group->domain != group->blocking_domain) return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, @@ -2072,38 +2089,49 @@ static int iommu_group_do_detach_device(struct device *dev, void *data) return 0; } -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group) +static int __iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *new_domain) { int ret; + if (group->domain == new_domain) + return 0; + /* - * If the group has been claimed already, do not re-attach the default - * domain. + * New drivers should support default domains and so the detach_dev() op + * will never be called. Otherwise the NULL domain represents some + * platform specific behavior. */ - if (!group->default_domain || group->owner) { - __iommu_group_for_each_dev(group, domain, + if (!new_domain) { + if (WARN_ON(!group->domain->ops->detach_dev)) + return -EINVAL; + __iommu_group_for_each_dev(group, group->domain, iommu_group_do_detach_device); group->domain = NULL; - return; + return 0; } - if (group->domain == group->default_domain) - return; - - /* Detach by re-attaching to the default domain */ - ret = __iommu_group_for_each_dev(group, group->default_domain, + /* + * Changing the domain is done by calling attach_dev() on the new + * domain. This switch does not have to be atomic and DMA can be + * discarded during the transition. DMA must only be able to access + * either new_domain or group->domain, never something else. + * + * Note that this is called in error unwind paths, attaching to a + * domain that has already been attached cannot fail. + */ + ret = __iommu_group_for_each_dev(group, new_domain, iommu_group_do_attach_device); - if (ret != 0) - WARN_ON(1); - else - group->domain = group->default_domain; + if (ret) + return ret; + group->domain = new_domain; + return 0; } void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) { mutex_lock(&group->mutex); - __iommu_detach_group(domain, group); + __iommu_group_set_core_domain(group); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_detach_group); @@ -3088,6 +3116,29 @@ void iommu_device_unuse_default_domain(struct device *dev) iommu_group_put(group); } +static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) +{ + struct group_device *dev = + list_first_entry(&group->devices, struct group_device, list); + + if (group->blocking_domain) + return 0; + + group->blocking_domain = + __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); + if (!group->blocking_domain) { + /* + * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED + * create an empty domain instead. + */ + group->blocking_domain = __iommu_domain_alloc( + dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); + if (!group->blocking_domain) + return -EINVAL; + } + return 0; +} + /** * iommu_group_claim_dma_owner() - Set DMA ownership of a group * @group: The group. @@ -3111,9 +3162,14 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) goto unlock_out; } + ret = __iommu_group_alloc_blocking_domain(group); + if (ret) + goto unlock_out; + + ret = __iommu_group_set_domain(group, group->blocking_domain); + if (ret) + goto unlock_out; group->owner = owner; - if (group->domain) - __iommu_detach_group(group->domain, group); } group->owner_cnt++; @@ -3132,18 +3188,17 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); */ void iommu_group_release_dma_owner(struct iommu_group *group) { + int ret; + mutex_lock(&group->mutex); if (WARN_ON(!group->owner_cnt || !group->owner)) goto unlock_out; group->owner_cnt = 0; - /* - * The UNMANAGED domain should be detached before all USER - * owners have been released. - */ - if (!WARN_ON(group->domain) && group->default_domain) - __iommu_attach_group(group->default_domain, group); group->owner = NULL; + ret = __iommu_group_set_domain(group, group->default_domain); + WARN(ret, "iommu driver failed to attach the default domain"); + unlock_out: mutex_unlock(&group->mutex); } From de78657e16f41417da9332f09c2d67d100096939 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 5 May 2022 21:27:30 +0800 Subject: [PATCH 780/803] iommu/mediatek: Fix NULL pointer dereference when printing dev_name When larbdev is NULL (in the case I hit, the node is incorrectly set iommus = <&iommu NUM>), it will cause device_link_add() fail and kernel crashes when we try to print dev_name(larbdev). Let's fail the probe if a larbdev is NULL to avoid invalid inputs from dts. It should work for normal correct setting and avoid the crash caused by my incorrect setting. Error log: [ 18.189042][ T301] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050 ... [ 18.344519][ T301] pstate: a0400005 (NzCv daif +PAN -UAO) [ 18.345213][ T301] pc : mtk_iommu_probe_device+0xf8/0x118 [mtk_iommu] [ 18.346050][ T301] lr : mtk_iommu_probe_device+0xd0/0x118 [mtk_iommu] [ 18.346884][ T301] sp : ffffffc00a5635e0 [ 18.347392][ T301] x29: ffffffc00a5635e0 x28: ffffffd44a46c1d8 [ 18.348156][ T301] x27: ffffff80c39a8000 x26: ffffffd44a80cc38 [ 18.348917][ T301] x25: 0000000000000000 x24: ffffffd44a80cc38 [ 18.349677][ T301] x23: ffffffd44e4da4c6 x22: ffffffd44a80cc38 [ 18.350438][ T301] x21: ffffff80cecd1880 x20: 0000000000000000 [ 18.351198][ T301] x19: ffffff80c439f010 x18: ffffffc00a50d0c0 [ 18.351959][ T301] x17: ffffffffffffffff x16: 0000000000000004 [ 18.352719][ T301] x15: 0000000000000004 x14: ffffffd44eb5d420 [ 18.353480][ T301] x13: 0000000000000ad2 x12: 0000000000000003 [ 18.354241][ T301] x11: 00000000fffffad2 x10: c0000000fffffad2 [ 18.355003][ T301] x9 : a0d288d8d7142d00 x8 : a0d288d8d7142d00 [ 18.355763][ T301] x7 : ffffffd44c2bc640 x6 : 0000000000000000 [ 18.356524][ T301] x5 : 0000000000000080 x4 : 0000000000000001 [ 18.357284][ T301] x3 : 0000000000000000 x2 : 0000000000000005 [ 18.358045][ T301] x1 : 0000000000000000 x0 : 0000000000000000 [ 18.360208][ T301] Hardware name: MT6873 (DT) [ 18.360771][ T301] Call trace: [ 18.361168][ T301] dump_backtrace+0xf8/0x1f0 [ 18.361737][ T301] dump_stack_lvl+0xa8/0x11c [ 18.362305][ T301] dump_stack+0x1c/0x2c [ 18.362816][ T301] mrdump_common_die+0x184/0x40c [mrdump] [ 18.363575][ T301] ipanic_die+0x24/0x38 [mrdump] [ 18.364230][ T301] atomic_notifier_call_chain+0x128/0x2b8 [ 18.364937][ T301] die+0x16c/0x568 [ 18.365394][ T301] __do_kernel_fault+0x1e8/0x214 [ 18.365402][ T301] do_page_fault+0xb8/0x678 [ 18.366934][ T301] do_translation_fault+0x48/0x64 [ 18.368645][ T301] do_mem_abort+0x68/0x148 [ 18.368652][ T301] el1_abort+0x40/0x64 [ 18.368660][ T301] el1h_64_sync_handler+0x54/0x88 [ 18.368668][ T301] el1h_64_sync+0x68/0x6c [ 18.368673][ T301] mtk_iommu_probe_device+0xf8/0x118 [mtk_iommu] ... Cc: Robin Murphy Cc: Yong Wu Reported-by: kernel test robot Fixes: 635319a4a744 ("media: iommu/mediatek: Add device_link between the consumer and the larb devices") Signed-off-by: Miles Chen Reviewed-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220505132731.21628-1-miles.chen@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 6 ++++++ drivers/iommu/mtk_iommu_v1.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 71b2ace74cd6..bb9dd92c9898 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -790,6 +790,9 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) * All the ports in each a device should be in the same larbs. */ larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + if (larbid >= MTK_LARB_NR_MAX) + return ERR_PTR(-EINVAL); + for (i = 1; i < fwspec->num_ids; i++) { larbidx = MTK_M4U_TO_LARB(fwspec->ids[i]); if (larbid != larbidx) { @@ -799,6 +802,9 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) } } larbdev = data->larb_imu[larbid].dev; + if (!larbdev) + return ERR_PTR(-EINVAL); + link = device_link_add(dev, larbdev, DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); if (!link) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 62669e60991f..e1cb51b9866c 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -78,6 +78,7 @@ /* MTK generation one iommu HW only support 4K size mapping */ #define MT2701_IOMMU_PAGE_SHIFT 12 #define MT2701_IOMMU_PAGE_SIZE (1UL << MT2701_IOMMU_PAGE_SHIFT) +#define MT2701_LARB_NR_MAX 3 /* * MTK m4u support 4GB iova address space, and only support 4K page @@ -486,6 +487,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) /* Link the consumer device with the smi-larb device(supplier) */ larbid = mt2701_m4u_to_larb(fwspec->ids[0]); + if (larbid >= MT2701_LARB_NR_MAX) + return ERR_PTR(-EINVAL); + for (idx = 1; idx < fwspec->num_ids; idx++) { larbidx = mt2701_m4u_to_larb(fwspec->ids[idx]); if (larbid != larbidx) { @@ -496,6 +500,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) } larbdev = data->larb_imu[larbid].dev; + if (!larbdev) + return ERR_PTR(-EINVAL); + link = device_link_add(dev, larbdev, DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); if (!link) From a3884774d731f03d3a3dd4fb70ec2d9341ceb39d Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Sat, 7 May 2022 16:52:03 +0800 Subject: [PATCH 781/803] iommu/dma: Fix iova map result check bug The data type of the return value of the iommu_map_sg_atomic is ssize_t, but the data type of iova size is size_t, e.g. one is int while the other is unsigned int. When iommu_map_sg_atomic return value is compared with iova size, it will force the signed int to be converted to unsigned int, if iova map fails and iommu_map_sg_atomic return error code is less than 0, then (ret < iova_len) is false, which will to cause not do free iova, and the master can still successfully get the iova of map fail, which is not expected. Therefore, we need to check the return value of iommu_map_sg_atomic in two cases according to whether it is less than 0. Fixes: ad8f36e4b6b1 ("iommu: return full error code from iommu_map_sg[_atomic]()") Signed-off-by: Yunfei Wang Cc: # 5.15.* Reviewed-by: Robin Murphy Reviewed-by: Miles Chen Link: https://lore.kernel.org/r/20220507085204.16914-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1ca85d37eeab..8e0ed400a439 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -776,6 +776,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; struct page **pages; dma_addr_t iova; + ssize_t ret; if (static_branch_unlikely(&iommu_deferred_attach_enabled) && iommu_deferred_attach(dev, domain)) @@ -813,8 +814,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, arch_dma_prep_coherent(sg_page(sg), sg->length); } - if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot) - < size) + ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); + if (ret < 0 || ret < size) goto out_free_sg; sgt->sgl->dma_address = iova; @@ -1214,7 +1215,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * implementation - it knows better than we do. */ ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); - if (ret < iova_len) + if (ret < 0 || ret < iova_len) goto out_free_iova; return __finalise_sg(dev, sg, nents, iova); From b8397a8f4ebc0b84eefd990dc08995ba2ae9015c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 9 May 2022 11:16:08 +0100 Subject: [PATCH 782/803] iommu/dma: Explicitly sort PCI DMA windows Originally, creating the dma_ranges resource list in pre-sorted fashion was the simplest and most efficient way to enforce the order required by iova_reserve_pci_windows(). However since then at least one PCI host driver is now re-sorting the list for its own probe-time processing, which doesn't seem entirely unreasonable, so that basic assumption no longer holds. Make iommu-dma robust and get the sort order it needs by explicitly sorting, which means we can also save the effort at creation time and just build the list in whatever natural order the DT had. Signed-off-by: Robin Murphy Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/35661036a7e4160850895f9b37f35408b6a29f2f.1652091160.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 13 ++++++++++++- drivers/pci/of.c | 8 +------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8e0ed400a439..f90251572a5d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -414,6 +415,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, return 0; } +static int iommu_dma_ranges_sort(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct resource_entry *res_a = list_entry(a, typeof(*res_a), node); + struct resource_entry *res_b = list_entry(b, typeof(*res_b), node); + + return res_a->res->start > res_b->res->start; +} + static int iova_reserve_pci_windows(struct pci_dev *dev, struct iova_domain *iovad) { @@ -432,6 +442,7 @@ static int iova_reserve_pci_windows(struct pci_dev *dev, } /* Get reserved DMA windows from host bridge */ + list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort); resource_list_for_each_entry(window, &bridge->dma_ranges) { end = window->res->start - window->offset; resv_iova: @@ -440,7 +451,7 @@ resv_iova: hi = iova_pfn(iovad, end); reserve_iova(iovad, lo, hi); } else if (end < start) { - /* dma_ranges list should be sorted */ + /* DMA ranges should be non-overlapping */ dev_err(&dev->dev, "Failed to reserve IOVA [%pa-%pa]\n", &start, &end); diff --git a/drivers/pci/of.c b/drivers/pci/of.c index cb2e8351c2cc..8f0ebaf9ae85 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -369,7 +369,6 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev, dev_dbg(dev, "Parsing dma-ranges property...\n"); for_each_of_pci_range(&parser, &range) { - struct resource_entry *entry; /* * If we failed translation or got a zero-sized region * then skip this range @@ -393,12 +392,7 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev, goto failed; } - /* Keep the resource list sorted */ - resource_list_for_each_entry(entry, ib_resources) - if (entry->res->start > res->start) - break; - - pci_add_resource_offset(&entry->node, res, + pci_add_resource_offset(ib_resources, res, res->start - range.pci_addr); } From cd901e9284c7893eebee6857ebe30b691ea91224 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 10 May 2022 10:34:00 +0800 Subject: [PATCH 783/803] iommu/vt-d: Remove unneeded validity check on dev dev_iommu_priv_get() is being used at the top of this function which dereferences dev. Dev cannot be NULL after this. Remove the validity check on dev and simplify the code. Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20220313150337.593650-1-usama.anjum@collabora.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220510023407.2759143-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index efcecfa5952a..b5caea9a7e78 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2501,7 +2501,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } } - if (dev && domain_context_mapping(domain, dev)) { + if (domain_context_mapping(domain, dev)) { dev_err(dev, "Domain context map failed\n"); dmar_remove_one_dev_info(dev); return NULL; From e19c3992b9f8bef9c8c4aa59fa30a83e7e35eccc Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:01 +0800 Subject: [PATCH 784/803] iommu/vt-d: Change return type of dmar_insert_one_dev_info() The dmar_insert_one_dev_info() returns the pass-in domain on success and NULL on failure. This doesn't make much sense. Change it to an integer. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220416120423.879552-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b5caea9a7e78..6bb229dbd0d9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2452,10 +2452,9 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, - int bus, int devfn, - struct device *dev, - struct dmar_domain *domain) +static int dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, + int devfn, struct device *dev, + struct dmar_domain *domain) { struct device_domain_info *info = dev_iommu_priv_get(dev); unsigned long flags; @@ -2468,7 +2467,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, spin_unlock(&iommu->lock); if (ret) { spin_unlock_irqrestore(&device_domain_lock, flags); - return NULL; + return ret; } list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&device_domain_lock, flags); @@ -2479,7 +2478,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (ret) { dev_err(dev, "PASID table allocation failed\n"); dmar_remove_one_dev_info(dev); - return NULL; + return ret; } /* Setup the PASID entry for requests without PASID: */ @@ -2497,17 +2496,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); - return NULL; + return ret; } } - if (domain_context_mapping(domain, dev)) { + ret = domain_context_mapping(domain, dev); + if (ret) { dev_err(dev, "Domain context map failed\n"); dmar_remove_one_dev_info(dev); - return NULL; + return ret; } - return domain; + return 0; } static int iommu_domain_identity_map(struct dmar_domain *domain, @@ -2585,7 +2585,6 @@ static int __init si_domain_init(int hw) static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { - struct dmar_domain *ndomain; struct intel_iommu *iommu; u8 bus, devfn; @@ -2593,11 +2592,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) if (!iommu) return -ENODEV; - ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); - if (ndomain != domain) - return -EBUSY; - - return 0; + return dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); } static bool device_has_rmrr(struct device *dev) From bac4e778d67a5f0fd8cc49029c1fff127b86c874 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:02 +0800 Subject: [PATCH 785/803] iommu/vt-d: Fold dmar_insert_one_dev_info() into its caller Fold dmar_insert_one_dev_info() into domain_add_dev_info() which is its only caller. No intentional functional impact. Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220416120423.879552-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 110 +++++++++++++++++------------------- 1 file changed, 51 insertions(+), 59 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6bb229dbd0d9..f200af2233d0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2452,64 +2452,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, - int devfn, struct device *dev, - struct dmar_domain *domain) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - unsigned long flags; - int ret; - - spin_lock_irqsave(&device_domain_lock, flags); - info->domain = domain; - spin_lock(&iommu->lock); - ret = domain_attach_iommu(domain, iommu); - spin_unlock(&iommu->lock); - if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); - return ret; - } - list_add(&info->link, &domain->devices); - spin_unlock_irqrestore(&device_domain_lock, flags); - - /* PASID table is mandatory for a PCI device in scalable mode. */ - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - ret = intel_pasid_alloc_table(dev); - if (ret) { - dev_err(dev, "PASID table allocation failed\n"); - dmar_remove_one_dev_info(dev); - return ret; - } - - /* Setup the PASID entry for requests without PASID: */ - spin_lock_irqsave(&iommu->lock, flags); - if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); - else if (domain_use_first_level(domain)) - ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); - else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); - spin_unlock_irqrestore(&iommu->lock, flags); - if (ret) { - dev_err(dev, "Setup RID2PASID failed\n"); - dmar_remove_one_dev_info(dev); - return ret; - } - } - - ret = domain_context_mapping(domain, dev); - if (ret) { - dev_err(dev, "Domain context map failed\n"); - dmar_remove_one_dev_info(dev); - return ret; - } - - return 0; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long first_vpfn, unsigned long last_vpfn) @@ -2585,14 +2527,64 @@ static int __init si_domain_init(int hw) static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; + unsigned long flags; u8 bus, devfn; + int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; - return dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); + spin_lock_irqsave(&device_domain_lock, flags); + info->domain = domain; + spin_lock(&iommu->lock); + ret = domain_attach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + if (ret) { + spin_unlock_irqrestore(&device_domain_lock, flags); + return ret; + } + list_add(&info->link, &domain->devices); + spin_unlock_irqrestore(&device_domain_lock, flags); + + /* PASID table is mandatory for a PCI device in scalable mode. */ + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { + ret = intel_pasid_alloc_table(dev); + if (ret) { + dev_err(dev, "PASID table allocation failed\n"); + dmar_remove_one_dev_info(dev); + return ret; + } + + /* Setup the PASID entry for requests without PASID: */ + spin_lock_irqsave(&iommu->lock, flags); + if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, domain, + dev, PASID_RID2PASID); + else if (domain_use_first_level(domain)) + ret = domain_setup_first_level(iommu, domain, dev, + PASID_RID2PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, + dev, PASID_RID2PASID); + spin_unlock_irqrestore(&iommu->lock, flags); + if (ret) { + dev_err(dev, "Setup RID2PASID failed\n"); + dmar_remove_one_dev_info(dev); + return ret; + } + } + + ret = domain_context_mapping(domain, dev); + if (ret) { + dev_err(dev, "Domain context map failed\n"); + dmar_remove_one_dev_info(dev); + return ret; + } + + return 0; } static bool device_has_rmrr(struct device *dev) From ea661ad6e1573d5b08c27444ff2ed403bf39ff66 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:03 +0800 Subject: [PATCH 786/803] iommu/vt-d: Size Page Request Queue to avoid overflow condition PRQ overflow may cause I/O throughput congestion, resulting in unnecessary degradation of I/O performance. Appropriately increasing the length of PRQ can greatly reduce the occurrence of PRQ overflow. The count of maximum page requests that can be generated in parallel by a PCIe device is statically defined in the Outstanding Page Request Capacity field of the PCIe ATS configure space. The new length of PRQ is calculated by summing up the value of Outstanding Page Request Capacity register across all devices where Page Requests are supported on the real PR-capable platform (Intel Sapphire Rapids). The result is round to the nearest higher power of 2. The PRQ length is also double sized as the VT-d IOMMU driver only updates the Page Request Queue Head Register (PQH_REG) after processing the entire queue. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220421113558.3504874-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index b3b125b332aa..207ef06ba3e1 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -9,7 +9,7 @@ #define __INTEL_SVM_H__ /* Page Request Queue depth */ -#define PRQ_ORDER 2 +#define PRQ_ORDER 4 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) From 9d6ab26a75f47332da9949a0112727e10f2d6e51 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:04 +0800 Subject: [PATCH 787/803] iommu/vt-d: Block force-snoop domain attaching if no SC support In the attach_dev callback of the default domain ops, if the domain has been set force_snooping, but the iommu hardware of the device does not support SC(Snoop Control) capability, the callback should block it and return a corresponding error code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220508123525.1973626-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f200af2233d0..23985cb5c8ee 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4333,6 +4333,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) + return -EOPNOTSUPP; + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) From fc0051cb95909ab56bd8c929f24d48c9870c3e3a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:05 +0800 Subject: [PATCH 788/803] iommu/vt-d: Check domain force_snooping against attached devices As domain->force_snooping only impacts the devices attached with the domain, there's no need to check against all IOMMU units. On the other hand, force_snooping could be set on a domain no matter whether it has been attached or not, and once set it is an immutable flag. If no device attached, the operation always succeeds. Then this empty domain can be only attached to a device of which the IOMMU supports snoop control. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220508123525.1973626-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 53 ++++++++++++++++++++++++++++++++++--- drivers/iommu/intel/pasid.c | 42 +++++++++++++++++++++++++++++ drivers/iommu/intel/pasid.h | 2 ++ include/linux/intel-iommu.h | 1 + 4 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 23985cb5c8ee..f366e8c49636 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2438,7 +2438,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level == 5) flags |= PASID_FLAG_FL5LP; - if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + if (domain->force_snooping) flags |= PASID_FLAG_PAGE_SNOOP; return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, @@ -4410,7 +4410,7 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if (dmar_domain->force_snooping) + if (dmar_domain->set_pte_snp) prot |= DMA_PTE_SNP; max_addr = iova + size; @@ -4533,13 +4533,60 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static bool domain_support_force_snooping(struct dmar_domain *domain) +{ + struct device_domain_info *info; + bool support = true; + + assert_spin_locked(&device_domain_lock); + list_for_each_entry(info, &domain->devices, link) { + if (!ecap_sc_support(info->iommu->ecap)) { + support = false; + break; + } + } + + return support; +} + +static void domain_set_force_snooping(struct dmar_domain *domain) +{ + struct device_domain_info *info; + + assert_spin_locked(&device_domain_lock); + + /* + * Second level page table supports per-PTE snoop control. The + * iommu_map() interface will handle this by setting SNP bit. + */ + if (!domain_use_first_level(domain)) { + domain->set_pte_snp = true; + return; + } + + list_for_each_entry(info, &domain->devices, link) + intel_pasid_setup_page_snoop_control(info->iommu, info->dev, + PASID_RID2PASID); +} + static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long flags; - if (!domain_update_iommu_snooping(NULL)) + if (dmar_domain->force_snooping) + return true; + + spin_lock_irqsave(&device_domain_lock, flags); + if (!domain_support_force_snooping(dmar_domain)) { + spin_unlock_irqrestore(&device_domain_lock, flags); return false; + } + + domain_set_force_snooping(dmar_domain); dmar_domain->force_snooping = true; + spin_unlock_irqrestore(&device_domain_lock, flags); + return true; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index f8d215d85695..d19dd66a670c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -762,3 +762,45 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } + +/* + * Set the page snoop control for a pasid entry which has been set up. + */ +void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, + struct device *dev, u32 pasid) +{ + struct pasid_entry *pte; + u16 did; + + spin_lock(&iommu->lock); + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { + spin_unlock(&iommu->lock); + return; + } + + pasid_set_pgsnp(pte); + did = pasid_get_domain_id(pte); + spin_unlock(&iommu->lock); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + /* + * VT-d spec 3.4 table23 states guides for cache invalidation: + * + * - PASID-selective-within-Domain PASID-cache invalidation + * - PASID-selective PASID-based IOTLB invalidation + * - If (pasid is RID_PASID) + * - Global Device-TLB invalidation to affected functions + * Else + * - PASID-based Device-TLB invalidation (with S=1 and + * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions + */ + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index ab4408c824a5..583ea67fc783 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -123,4 +123,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, bool fault_ignore); int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid); void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid); +void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, + struct device *dev, u32 pasid); #endif /* __INTEL_PASID_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 72e5d7900e71..4f29139bbfc3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -540,6 +540,7 @@ struct dmar_domain { u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ + u8 set_pte_snp:1; struct list_head devices; /* all devices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ From e80552267b63df211c805e8e8c534bfe5909b7e7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:06 +0800 Subject: [PATCH 789/803] iommu/vt-d: Remove domain_update_iommu_snooping() The IOMMU force snooping capability is not required to be consistent among all the IOMMUs anymore. Remove force snooping capability check in the IOMMU hot-add path and domain_update_iommu_snooping() becomes a dead code now. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220508123525.1973626-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f366e8c49636..985a83e24244 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -533,33 +533,6 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_unlock(); } -static bool domain_update_iommu_snooping(struct intel_iommu *skip) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool ret = true; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (iommu != skip) { - /* - * If the hardware is operating in the scalable mode, - * the snooping control is always supported since we - * always set PASID-table-entry.PGSNP bit if the domain - * is managed outside (UNMANAGED). - */ - if (!sm_supported(iommu) && - !ecap_sc_support(iommu->ecap)) { - ret = false; - break; - } - } - } - rcu_read_unlock(); - - return ret; -} - static int domain_update_iommu_superpage(struct dmar_domain *domain, struct intel_iommu *skip) { @@ -3572,12 +3545,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu->name); return -ENXIO; } - if (!ecap_sc_support(iommu->ecap) && - domain_update_iommu_snooping(iommu)) { - pr_warn("%s: Doesn't support snooping.\n", - iommu->name); - return -ENXIO; - } + sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", From 0d647b33e74f7cb98b7c74d638922d6c03bfdb94 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 May 2022 10:34:07 +0800 Subject: [PATCH 790/803] iommu/vt-d: Remove hard coding PGSNP bit in PASID entries As enforce_cache_coherency has been introduced into the iommu_domain_ops, the kernel component which owns the iommu domain is able to opt-in its requirement for force snooping support. The iommu driver has no need to hard code the page snoop control bit in the PASID table entries anymore. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220508123525.1973626-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20220510023407.2759143-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index d19dd66a670c..cb4c1d0cf25c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -710,9 +710,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) - pasid_set_pgsnp(pte); - /* * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). From 1d6595b4cd47acfd824550f48f10b54a6f0e93ee Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 2 Mar 2022 10:24:22 -0500 Subject: [PATCH 791/803] drm/vmwgfx: Fix fencing on SVGAv3 Port of the vmwgfx to SVGAv3 lacked support for fencing. SVGAv3 removed FIFO's and replaced them with command buffers and extra registers. The initial version of SVGAv3 lacked support for most advanced features (e.g. 3D) which made fences unnecessary. That is no longer the case, especially as 3D support is being turned on. Switch from FIFO commands and capabilities to command buffers and extra registers to enable fences on SVGAv3. Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20220302152426.885214-5-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 8 ++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 28 ++++++++++++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 26 +++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 +++++--- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index a3bfbb6c3e14..bf1b394753da 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno) *seqno = atomic_add_return(1, &dev_priv->marker_seq); } while (*seqno == 0); - if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) { + if (!vmw_has_fences(dev_priv)) { /* * Don't request hardware to send a fence. The diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index ea3ecdda561d..6de0b9ef5c77 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw, outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT); } +static inline bool vmw_has_fences(struct vmw_private *vmw) +{ + if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | + SVGA_CAP_CMD_BUFFERS_2)) != 0) + return true; + return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0; +} + #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 59d6a2dd4c2e..66cc35dc223e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence) return container_of(fence->base.lock, struct vmw_fence_manager, lock); } +static u32 vmw_fence_goal_read(struct vmw_private *vmw) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + return vmw_read(vmw, SVGA_REG_FENCE_GOAL); + else + return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL); +} + +static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + vmw_write(vmw, SVGA_REG_FENCE_GOAL, value); + else + vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value); +} + /* * Note on fencing subsystem usage of irqs: * Typically the vmw_fences_update function is called @@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, if (likely(!fman->seqno_valid)) return false; - goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_fence_goal_read(fman->dev_priv); if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP)) return false; @@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, list_for_each_entry(fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; - vmw_fifo_mem_write(fman->dev_priv, - SVGA_FIFO_FENCE_GOAL, - fence->base.seqno); + vmw_fence_goal_write(fman->dev_priv, + fence->base.seqno); break; } } @@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence) if (dma_fence_is_signaled_locked(&fence->base)) return false; - goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_fence_goal_read(fman->dev_priv); if (likely(fman->seqno_valid && goal_seqno - fence->base.seqno < VMW_FENCE_WRAP)) return false; - vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL, - fence->base.seqno); + vmw_fence_goal_write(fman->dev_priv, fence->base.seqno); fman->seqno_valid = true; return true; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index c5191de365ca..fe4732bf2c9d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -32,6 +32,14 @@ #define VMW_FENCE_WRAP (1 << 24) +static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw) +{ + if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) + return SVGA_IRQFLAG_REG_FENCE_GOAL; + else + return SVGA_IRQFLAG_FENCE_GOAL; +} + /** * vmw_thread_fn - Deferred (process context) irq handler * @@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg) wake_up_all(&dev_priv->fifo_queue); if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE | - SVGA_IRQFLAG_FENCE_GOAL)) && + vmw_irqflag_fence_goal(dev_priv))) && !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending)) ret = IRQ_WAKE_THREAD; @@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv, if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP)) return true; - if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) && - vmw_fifo_idle(dev_priv, seqno)) + if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno)) return true; /** @@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, unsigned long timeout) { struct vmw_fifo_state *fifo_state = dev_priv->fifo; + bool fifo_down = false; uint32_t count = 0; uint32_t signal_seq; @@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, */ if (fifo_idle) { - down_read(&fifo_state->rwsem); if (dev_priv->cman) { ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible, 10*HZ); if (ret) goto out_err; + } else if (fifo_state) { + down_read(&fifo_state->rwsem); + fifo_down = true; } } @@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, } } finish_wait(&dev_priv->fence_queue, &__wait); - if (ret == 0 && fifo_idle) + if (ret == 0 && fifo_idle && fifo_state) vmw_fence_write(dev_priv, signal_seq); wake_up_all(&dev_priv->fence_queue); out_err: - if (fifo_idle) + if (fifo_down) up_read(&fifo_state->rwsem); return ret; @@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) void vmw_goal_waiter_add(struct vmw_private *dev_priv) { - vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv), &dev_priv->goal_queue_waiters); } void vmw_goal_waiter_remove(struct vmw_private *dev_priv) { - vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv), &dev_priv->goal_queue_waiters); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index bbd2f4ec08ec..93431e8f6606 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, mode_cmd, is_bo_proxy); - /* * vmw_create_bo_proxy() adds a reference that is no longer * needed @@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, ret = vmw_user_lookup_handle(dev_priv, file_priv, mode_cmd->handles[0], &surface, &bo); - if (ret) + if (ret) { + DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", + mode_cmd->handles[0], mode_cmd->handles[0]); goto err_out; + } if (!bo && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { - DRM_ERROR("Surface size cannot exceed %dx%d", + DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, dev_priv->texture_max_height); goto err_out; From 3059d9b9f6aa433a55b9d0d21b566396d5497c33 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 2 Mar 2022 10:24:24 -0500 Subject: [PATCH 792/803] drm/vmwgfx: Initialize drm_mode_fb_cmd2 Transition to drm_mode_fb_cmd2 from drm_mode_fb_cmd left the structure unitialized. drm_mode_fb_cmd2 adds a few additional members, e.g. flags and modifiers which were never initialized. Garbage in those members can cause random failures during the bringup of the fbcon. Initializing the structure fixes random blank screens after bootup due to flags/modifiers mismatches during the fbcon bring up. Fixes: dabdcdc9822a ("drm/vmwgfx: Switch to mode_cmd2") Signed-off-by: Zack Rusin Cc: Daniel Vetter Cc: # v4.10+ Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20220302152426.885214-7-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 8ee34576c7d0..adf17c740656 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -483,7 +483,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par, static int vmw_fb_kms_framebuffer(struct fb_info *info) { - struct drm_mode_fb_cmd2 mode_cmd; + struct drm_mode_fb_cmd2 mode_cmd = {0}; struct vmw_fb_par *par = info->par; struct fb_var_screeninfo *var = &info->var; struct drm_framebuffer *cur_fb; From 21d1d192890ced87f2f04f8f4dea92406e0b162a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 18 Mar 2022 13:43:31 -0400 Subject: [PATCH 793/803] drm/vmwgfx: Disable command buffers on svga3 without gbobjects With very limited vram on svga3 it's difficult to handle all the surface migrations. Without gbobjects, i.e. the ability to store surfaces in guest mobs, there's no reason to support intermediate svga2 features, especially because we can fall back to fb traces and svga3 will never support those in-between features. On svga3 we wither want to use fb traces or screen targets (i.e. gbobjects), nothing in between. This fixes presentation on a lot of fusion/esxi tech previews where the exposed svga3 caps haven't been finalized yet. Signed-off-by: Zack Rusin Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Cc: # v5.14+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20220318174332.440068-5-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index bf1b394753da..162dfeb1cc5a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv, */ bool vmw_cmd_supported(struct vmw_private *vmw) { - if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | - SVGA_CAP_CMD_BUFFERS_2)) != 0) - return true; + bool has_cmdbufs = + (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | + SVGA_CAP_CMD_BUFFERS_2)) != 0; + if (vmw_is_svga_v3(vmw)) + return (has_cmdbufs && + (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0); /* * We have FIFO cmd's */ - return vmw->fifo_mem != NULL; + return has_cmdbufs || vmw->fifo_mem != NULL; } From d031a8866e709c9d1ee5537a321b6192b4d2dc5b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 14 Apr 2022 17:52:39 +0200 Subject: [PATCH 794/803] gfs2: Fix filesystem block deallocation for short writes When a write cannot be carried out in full, gfs2_iomap_end() releases blocks that have been allocated for this write but haven't been used. To compute the end of the allocation, gfs2_iomap_end() incorrectly rounded the end of the attempted write down to the next block boundary to arrive at the end of the allocation. It would have to round up, but the end of the allocation is also available as iomap->offset + iomap->length, so just use that instead. In addition, use round_up() for computing the start of the unused range. Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 39080b2d6cf8..b6697333bb2b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1153,13 +1153,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ - loff_t blockmask = i_blocksize(inode) - 1; - loff_t end = (pos + length) & ~blockmask; + loff_t hstart = round_up(pos + written, i_blocksize(inode)); + loff_t hend = iomap->offset + iomap->length; - pos = (pos + written + blockmask) & ~blockmask; - if (pos < end) { - truncate_pagecache_range(inode, pos, end - 1); - punch_hole(ip, pos, end - pos); + if (hstart < hend) { + truncate_pagecache_range(inode, hstart, hend - 1); + punch_hole(ip, hstart, hend - hstart); } } From 42e4c3bdcae7833eeeaed7bf0c000c2de17dd291 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 27 Apr 2022 13:53:42 +0200 Subject: [PATCH 795/803] gfs2: Variable rename Instead of counting the number of bytes read from the filesystem, functions gfs2_file_direct_read and gfs2_file_read_iter count the number of bytes written into the user buffer. Conversely, functions gfs2_file_direct_write and gfs2_file_buffered_write count the number of bytes read from the user buffer. This is nothing but confusing, so change the read functions to count how many bytes they have read, and the write functions to count how many bytes they have written. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 48f01323c37c..4d36c01727ad 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -807,7 +807,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -839,11 +839,11 @@ retry_under_glock: pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, written); + IOMAP_DIO_PARTIAL, read); to->nofault = false; pagefault_enable(); if (ret > 0) - written = ret; + read = ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { size_t leftover; @@ -863,7 +863,7 @@ out_uninit: gfs2_holder_uninit(gh); if (ret < 0) return ret; - return written; + return read; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -873,7 +873,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -906,13 +906,13 @@ retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, read); + IOMAP_DIO_PARTIAL, written); from->nofault = false; if (ret == -ENOTBLK) ret = 0; if (ret > 0) - read = ret; + written = ret; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { size_t leftover; @@ -933,7 +933,7 @@ out_uninit: gfs2_holder_uninit(gh); if (ret < 0) return ret; - return read; + return written; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -941,7 +941,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct gfs2_inode *ip; struct gfs2_holder gh; size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -962,7 +962,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (ret >= 0) { if (!iov_iter_count(to)) return ret; - written = ret; + read = ret; } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; @@ -980,7 +980,7 @@ retry_under_glock: ret = generic_file_read_iter(iocb, to); pagefault_enable(); if (ret > 0) - written += ret; + read += ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { size_t leftover; @@ -998,7 +998,7 @@ retry_under_glock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - return written ? written : ret; + return read ? read : ret; } static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, @@ -1012,7 +1012,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct gfs2_holder *statfs_gh = NULL; size_t prev_count = 0, window_size = 0; size_t orig_count = iov_iter_count(from); - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -1050,13 +1050,13 @@ retry_under_glock: current->backing_dev_info = NULL; if (ret > 0) { iocb->ki_pos += ret; - read += ret; + written += ret; } if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); - from->count = orig_count - read; + from->count = orig_count - written; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { size_t leftover; @@ -1077,8 +1077,8 @@ out_uninit: gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); - from->count = orig_count - read; - return read ? read : ret; + from->count = orig_count - written; + return written ? written : ret; } /** From 6d22ff471070e21e24667be70764ee5abdfe5608 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 12:37:49 +0200 Subject: [PATCH 796/803] gfs2: Clean up use of fault_in_iov_iter_{read,write}able No need to store the return value of the fault_in functions in separate variables. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4d36c01727ad..acc0c1d41564 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -846,12 +846,10 @@ retry_under_glock: read = ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_writeable(to, window_size); + window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; @@ -915,12 +913,10 @@ retry_under_glock: written = ret; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); + window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; @@ -983,12 +979,10 @@ retry_under_glock: read += ret; if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(&gh); - leftover = fault_in_iov_iter_writeable(to, window_size); + window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); - if (leftover != window_size) { + if (window_size) { if (gfs2_holder_queued(&gh)) goto retry_under_glock; goto retry; @@ -1058,13 +1052,11 @@ retry_under_glock: from->count = orig_count - written; if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); + window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { - from->count = min(from->count, window_size - leftover); + if (window_size) { + from->count = min(from->count, window_size); if (gfs2_holder_queued(gh)) goto retry_under_glock; goto retry; From 72382264502d9348ead372f82ecc3044de5c82d2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 12:53:26 +0200 Subject: [PATCH 797/803] gfs2: Pull return value test out of should_fault_in_pages Pull the return value test of the previous read or write operation out of should_fault_in_pages(). In a following patch, we'll fault in pages before the I/O and there will be no return value to check. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index acc0c1d41564..ea87bef7314d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -770,7 +770,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } -static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, +static inline bool should_fault_in_pages(struct iov_iter *i, size_t *prev_count, size_t *window_size) { @@ -779,8 +779,6 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, if (likely(!count)) return false; - if (ret <= 0 && ret != -EFAULT) - return false; if (!iter_is_iovec(i)) return false; @@ -842,10 +840,12 @@ retry_under_glock: IOMAP_DIO_PARTIAL, read); to->nofault = false; pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; if (ret > 0) read = ret; - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); @@ -855,6 +855,7 @@ retry_under_glock: goto retry; } } +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: @@ -899,20 +900,23 @@ retry: goto out_uninit; /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) - goto out; + goto out_unlock; retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, IOMAP_DIO_PARTIAL, written); from->nofault = false; - - if (ret == -ENOTBLK) - ret = 0; + if (ret <= 0) { + if (ret == -ENOTBLK) + ret = 0; + if (ret != -EFAULT) + goto out_unlock; + } if (ret > 0) written = ret; - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); @@ -922,7 +926,7 @@ retry_under_glock: goto retry; } } -out: +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: @@ -975,10 +979,12 @@ retry_under_glock: pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; if (ret > 0) read += ret; - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, &prev_count, &window_size)) { gfs2_holder_allow_demote(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); @@ -988,6 +994,7 @@ retry_under_glock: goto retry; } } +out_unlock: if (gfs2_holder_queued(&gh)) gfs2_glock_dq(&gh); out_uninit: @@ -1050,8 +1057,11 @@ retry_under_glock: if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; + from->count = orig_count - written; - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); From 324d116c5a5c8204dc00e63f725a3c5ed09afb53 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 5 May 2022 13:32:23 +0200 Subject: [PATCH 798/803] gfs2: Align read and write chunks to the page cache Align the chunks that reads and writes are carried out in to the page cache rather than the user buffers. This will be more efficient in general, especially for allocating writes. Optimizing the case that the user buffer is gfs2 backed isn't very useful; we only need to make sure we won't deadlock. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ea87bef7314d..11c46407d4a8 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -771,6 +771,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, } static inline bool should_fault_in_pages(struct iov_iter *i, + struct kiocb *iocb, size_t *prev_count, size_t *window_size) { @@ -783,15 +784,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i, return false; size = PAGE_SIZE; - offs = offset_in_page(i->iov[0].iov_base + i->iov_offset); + offs = offset_in_page(iocb->ki_pos); if (*prev_count != count || !*window_size) { size_t nr_dirtied; - size = ALIGN(offs + count, PAGE_SIZE); - size = min_t(size_t, size, SZ_1M); nr_dirtied = max(current->nr_dirtied_pause - current->nr_dirtied, 8); - size = min(size, nr_dirtied << PAGE_SHIFT); + size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT); } *prev_count = count; @@ -845,7 +844,7 @@ retry_under_glock: if (ret > 0) read = ret; - if (should_fault_in_pages(to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); @@ -916,7 +915,7 @@ retry_under_glock: if (ret > 0) written = ret; - if (should_fault_in_pages(from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); @@ -984,7 +983,7 @@ retry_under_glock: if (ret > 0) read += ret; - if (should_fault_in_pages(to, &prev_count, &window_size)) { + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); @@ -1061,7 +1060,7 @@ retry_under_glock: goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, &prev_count, &window_size)) { + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_holder_allow_demote(gh); window_size -= fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); From fa5dfa645d85910d747f4e0c97f19e5e97d1c270 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2022 23:37:30 +0200 Subject: [PATCH 799/803] gfs2: buffered write prefaulting In gfs2_file_buffered_write, to increase the likelihood that all the user memory we're trying to write will be resident in memory, carry out the write in chunks and fault in each chunk of user memory before trying to write it. Otherwise, some workloads will trigger frequent short "internal" writes, causing filesystem blocks to be allocated and then partially deallocated again when writing into holes, which is wasteful and breaks reservations. Neither the chunked writes nor any of the short "internal" writes are user visible. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 11c46407d4a8..5eda1bcc85e3 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -778,7 +778,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i, size_t count = iov_iter_count(i); size_t size, offs; - if (likely(!count)) + if (!count) return false; if (!iter_is_iovec(i)) return false; @@ -1033,7 +1033,20 @@ retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { retry_under_glock: + gfs2_holder_allow_demote(gh); + window_size -= fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (!window_size) { + ret = -EFAULT; + goto out_unlock; + } + if (!gfs2_holder_queued(gh)) + goto retry; + from->count = min(from->count, window_size); + } + if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -1060,17 +1073,8 @@ retry_under_glock: goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); - window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - from->count = min(from->count, window_size); - if (gfs2_holder_queued(gh)) - goto retry_under_glock; - goto retry; - } - } + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) + goto retry_under_glock; out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); From e1fa9ea85ce89207d2ac0316da35a4a7736801f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2022 18:27:12 +0200 Subject: [PATCH 800/803] gfs2: Stop using glock holder auto-demotion for now We're having unresolved issues with the glock holder auto-demotion mechanism introduced in commit dc732906c245. This mechanism was assumed to be essential for avoiding frequent short reads and writes until commit 296abc0d91d8 ("gfs2: No short reads or writes upon glock contention"). Since then, when the inode glock is lost, it is simply re-acquired and the operation is resumed. This means that apart from the performance penalty, we might as well drop the inode glock before faulting in pages, and re-acquire it afterwards. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 54 +++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 5eda1bcc85e3..2556ae1f92ea 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -832,7 +832,6 @@ retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, @@ -845,14 +844,10 @@ retry_under_glock: read = ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); + gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - if (gfs2_holder_queued(gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(gh)) @@ -900,7 +895,6 @@ retry: /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out_unlock; -retry_under_glock: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, @@ -916,14 +910,10 @@ retry_under_glock: written = ret; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(gh); + gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (window_size) { - if (gfs2_holder_queued(gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(gh)) @@ -974,7 +964,6 @@ retry: ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); @@ -984,14 +973,10 @@ retry_under_glock: read += ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { - gfs2_holder_allow_demote(&gh); + gfs2_glock_dq(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(&gh); - if (window_size) { - if (gfs2_holder_queued(&gh)) - goto retry_under_glock; + if (window_size) goto retry; - } } out_unlock: if (gfs2_holder_queued(&gh)) @@ -1030,22 +1015,17 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); retry: + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + window_size -= fault_in_iov_iter_readable(from, window_size); + if (!window_size) { + ret = -EFAULT; + goto out_uninit; + } + from->count = min(from->count, window_size); + } ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; - if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { -retry_under_glock: - gfs2_holder_allow_demote(gh); - window_size -= fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (!window_size) { - ret = -EFAULT; - goto out_unlock; - } - if (!gfs2_holder_queued(gh)) - goto retry; - from->count = min(from->count, window_size); - } if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -1073,8 +1053,10 @@ retry_under_glock: goto out_unlock; from->count = orig_count - written; - if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) - goto retry_under_glock; + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(gh); + goto retry; + } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); From 42226c989789d8da4af1de0c31070c96726d990c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 May 2022 18:08:58 -0700 Subject: [PATCH 801/803] Linux 5.18-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2284d1ca2503..5033c0577c6d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Superb Owl # *DOCUMENTATION* From fa7e9ecc5e1c1a1e8aa7014b2749b22edc801dd2 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Thu, 19 May 2022 14:29:29 -0400 Subject: [PATCH 802/803] iommu/s390: Tolerate repeat attach_dev calls Since commit 0286300e6045 ("iommu: iommu_group_claim_dma_owner() must always assign a domain") s390-iommu will get called to allocate multiple unmanaged iommu domains for a vfio-pci device -- however the current s390-iommu logic tolerates only one. Recognize that multiple domains can be allocated and handle switching between DMA or different iommu domain tables during attach_dev. Signed-off-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220519182929.581898-1-mjrosato@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3833e86c6e7b..c898bcbbce11 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -99,7 +99,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, if (!domain_device) return -ENOMEM; - if (zdev->dma_table) { + if (zdev->dma_table && !zdev->s390_domain) { cc = zpci_dma_exit_device(zdev); if (cc) { rc = -EIO; @@ -107,6 +107,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, } } + if (zdev->s390_domain) + zpci_unregister_ioat(zdev, 0); + zdev->dma_table = s390_domain->dma_table; cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table)); @@ -136,7 +139,13 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; out_restore: - zpci_dma_init_device(zdev); + if (!zdev->s390_domain) { + zpci_dma_init_device(zdev); + } else { + zdev->dma_table = zdev->s390_domain->dma_table; + zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + virt_to_phys(zdev->dma_table)); + } out_free: kfree(domain_device); @@ -167,7 +176,7 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, } spin_unlock_irqrestore(&s390_domain->list_lock, flags); - if (found) { + if (found && (zdev->s390_domain == s390_domain)) { zdev->s390_domain = NULL; zpci_unregister_ioat(zdev, 0); zpci_dma_init_device(zdev); From 42bb5aa043382f09bef2cc33b8431be867c70f8e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 May 2022 12:22:14 +0200 Subject: [PATCH 803/803] iommu/amd: Increase timeout waiting for GA log enablement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some systems it can take a long time for the hardware to enable the GA log of the AMD IOMMU. The current wait time is only 0.1ms, but testing showed that it can take up to 14ms for the GA log to enter running state after it has been enabled. Sometimes the long delay happens when booting the system, sometimes only on resume. Adjust the timeout accordingly to not print a warning when hardware takes a longer than usual. There has already been an attempt to fix this with commit 9b45a7738eec ("iommu/amd: Fix loop timeout issue in iommu_ga_log_enable()") But that commit was based on some wrong math and did not fix the issue in all cases. Cc: "D. Ziegfeld" Cc: Jörg-Volker Peetz Fixes: 8bda0cfbdc1a ("iommu/amd: Detect and initialize guest vAPIC log") Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20220520102214.12563-1-joro@8bytes.org --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 648d6b94ba8c..b69f6ab440dc 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -84,7 +84,7 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 100000 +#define LOOP_TIMEOUT 2000000 /* * ACPI table definitions *