From c7fa848ff01dad9ed3146a6b1a7d3622131bcedd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 3 Mar 2022 15:33:10 +1000 Subject: [PATCH 001/708] KVM: PPC: Book3S HV P9: Fix "lost kick" race When new work is created that requires attention from the hypervisor (e.g., to inject an interrupt into the guest), fast_vcpu_kick is used to pull the target vcpu out of the guest if it may have been running. Therefore the work creation side looks like this: vcpu->arch.doorbell_request = 1; kvmppc_fast_vcpu_kick_hv(vcpu) { smp_mb(); cpu = vcpu->cpu; if (cpu != -1) send_ipi(cpu); } And the guest entry side *should* look like this: vcpu->cpu = smp_processor_id(); smp_mb(); if (vcpu->arch.doorbell_request) { // do something (abort entry or inject doorbell etc) } But currently the store and load are flipped, so it is possible for the entry to see no doorbell pending, and the doorbell creation misses the store to set cpu, resulting lost work (or at least delayed until the next guest exit). Fix this by reordering the entry operations and adding a smp_mb between them. The P8 path appears to have a similar race which is commented but not addressed yet. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220303053315.1056880-2-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 41 +++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c886557638a1..6fa518f6501d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) int cpu; struct rcuwait *waitp; + /* + * rcuwait_wake_up contains smp_mb() which orders prior stores that + * create pending work vs below loads of cpu fields. The other side + * is the barrier in vcpu run that orders setting the cpu fields vs + * testing for pending work. + */ + waitp = kvm_arch_vcpu_get_wait(vcpu); if (rcuwait_wake_up(waitp)) ++vcpu->stat.generic.halt_wakeup; @@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; } tvcpu->arch.prodded = 1; - smp_mb(); + smp_mb(); /* This orders prodded store vs ceded load */ if (tvcpu->arch.ceded) kvmppc_fast_vcpu_kick_hv(tvcpu); break; @@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) pvc = core_info.vc[sub]; pvc->pcpu = pcpu + thr; for_each_runnable_thread(i, vcpu, pvc) { + /* + * XXX: is kvmppc_start_thread called too late here? + * It updates vcpu->cpu and vcpu->arch.thread_cpu + * which are used by kvmppc_fast_vcpu_kick_hv(), but + * kick is called after new exceptions become available + * and exceptions are checked earlier than here, by + * kvmppc_core_prepare_to_enter. + */ kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (need_resched() || !kvm->arch.mmu_ready) goto out; + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + /* + * Orders set cpu/thread_cpu vs testing for pending interrupts and + * doorbells below. The other side is when these fields are set vs + * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to + * kick a vCPU to notice the pending interrupt. + */ + smp_mb(); + if (!nested) { kvmppc_core_prepare_to_enter(vcpu); if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, @@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - vcpu->cpu = pcpu; - vcpu->arch.thread_cpu = pcpu; - vc->pcpu = pcpu; - local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.ptid = 0; - local_paca->kvm_hstate.fake_suspend = 0; - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); trace_kvm_guest_enter(vcpu); @@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; From b5149e229218118c9cd44a4d256f970ddcbf745b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 16:47:25 +1000 Subject: [PATCH 002/708] KVM: PPC: Book3S PR: Disable SCV when AIL could be disabled PR KVM does not support running with AIL enabled, and SCV does is not supported with AIL disabled. Fix this by ensuring the SCV facility is disabled with FSCR while a CPU could be running with AIL=0. The PowerNV host supports disabling AIL on a per-CPU basis, so SCV just needs to be disabled when a vCPU is being run. The pSeries machine can only switch AIL on a system-wide basis, so it must disable SCV support at boot if the configuration can potentially run a PR KVM guest. Also ensure a the FSCR[SCV] bit can not be enabled when emulating mtFSCR for the guest. SCV is not emulated for the PR guest at the moment, this just fixes the host crashes. Alternatives considered and rejected: - SCV support can not be disabled by PR KVM after boot, because it is advertised to userspace with HWCAP. - AIL can not be disabled on a per-CPU basis. At least when running on pseries it is a per-LPAR setting. - Support for real-mode SCV vectors will not be added because they are at 0x17000 so making such a large fixed head space causes immediate value limits to be exceeded, requiring a lot rework and more code. - Disabling SCV for any PR KVM possible kernel will cause a slowdown when not using PR KVM. - A boot time option to disable SCV to use PR KVM is user-hostile. - System call instruction emulation for SCV facility unavailable instructions is too complex and old emulation code was subtly broken and removed. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20220222064727.2314380-2-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++++ arch/powerpc/kernel/setup_64.c | 28 ++++++++++++++++++++++++++++ arch/powerpc/kvm/Kconfig | 9 +++++++++ arch/powerpc/kvm/book3s_pr.c | 26 +++++++++++++++++--------- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 55caeee37c08..b66dd6f775a4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -809,6 +809,10 @@ __start_interrupts: * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * + * KVM: + * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM + * ensures that FSCR[SCV] is disabled whenever it has to force AIL off. + * * Call convention: * * syscall register convention is in Documentation/powerpc/syscall64-abi.rst diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..d973ae7558e3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -197,6 +197,34 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* + * - PR KVM does not support AIL mode interrupts in the host + * while a PR guest is running. + * + * - SCV system call interrupt vectors are only implemented for + * AIL mode interrupts. + * + * - On pseries, AIL mode can only be enabled and disabled + * system-wide so when a PR VM is created on a pseries host, + * all CPUs of the host are set to AIL=0 mode. + * + * - Therefore host CPUs must not execute scv while a PR VM + * exists. + * + * - SCV support can not be disabled dynamically because the + * feature is advertised to host userspace. Disabling the + * facility and emulating it would be possible but is not + * implemented. + * + * - So SCV support is blanket disabled if PR KVM could possibly + * run. That is, PR support compiled in, booting on pseries + * with hash MMU. + */ + if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } + /* Enable AIL if possible */ if (!pseries_enable_reloc_on_exc()) { init_task.thread.fscr &= ~FSCR_SCV; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 18e58085447c..ddd88179110a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR guest in user mode (problem state) and emulating all privileged instructions and registers. + This is only available for hash MMU mode and only supports + guests that use hash MMU mode. + This is not as fast as using hypervisor mode, but works on machines where hypervisor mode is not available or not usable, and can emulate processors that are different from the host processor, including emulating 32-bit processors on a 64-bit host. + Selecting this option will cause the SCV facility to be + disabled when the kernel is booted on the pseries platform in + hash MMU mode (regardless of PR VMs running). When any PR VMs + are running, "AIL" mode is disabled which may slow interrupts + and system calls on the host. + config KVM_BOOK3S_HV_EXIT_TIMING bool "Detailed timing for hypervisor real-mode code" depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 34a801c3604a..7bf9e6ca5c2d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu->in_use = 0; svcpu_put(svcpu); -#endif /* Disable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV); + } +#endif vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 @@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; svcpu_put(svcpu); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV); + } #endif if (kvmppc_is_split_real(vcpu)) @@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); kvmppc_save_tm_pr(vcpu); - /* Enable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); - vcpu->cpu = -1; } @@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) { + if (fscr & FSCR_SCV) + fscr &= ~FSCR_SCV; /* SCV must not be enabled */ if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { /* TAR got dropped, drop it in shadow too */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); From 839d893b4067da14b9c46fda2dfd88b80aeed551 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 16:47:26 +1000 Subject: [PATCH 003/708] KVM: PPC: Book3S PR: Disallow AIL != 0 KVM PR does not implement address translation modes on interrupt, so it must not allow H_SET_MODE to succeed. The behaviour change caused by this mode is architected and not advisory (interrupts *must* behave differently). QEMU does not deal with differences in AIL support in the host. The solution to that is a spapr capability and corresponding KVM CAP, but this patch does not break things more than before (the host behaviour already differs, this change just disallows some modes that are not implemented properly). By happy coincidence, this allows PR Linux guests that are using the SCV facility to boot and run, because Linux disables the use of SCV if AIL can not be set to 3. This does not fix the underlying problem of missing SCV support (an OS could implement real-mode SCV vectors and try to enable the facility). The true fix for that is for KVM PR to emulate scv interrupts from the facility unavailable interrupt. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20220222064727.2314380-3-npiggin@gmail.com --- arch/powerpc/kvm/book3s_pr_papr.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 1f10e7dfcdd0..dc4f51ac84bc 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu) +{ + unsigned long mflags = kvmppc_get_gpr(vcpu, 4); + unsigned long resource = kvmppc_get_gpr(vcpu, 5); + + if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) { + /* KVM PR does not provide AIL!=0 to guests */ + if (mflags == 0) + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + else + kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63); + return EMULATE_DONE; + } + return EMULATE_FAIL; +} + #ifdef CONFIG_SPAPR_TCE_IOMMU static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) { @@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_logical_ci_load(vcpu); case H_LOGICAL_CI_STORE: return kvmppc_h_pr_logical_ci_store(vcpu); + case H_SET_MODE: + return kvmppc_h_pr_set_mode(vcpu); case H_XIRR: case H_CPPR: case H_EOI: @@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd) case H_CEDE: case H_LOGICAL_CI_LOAD: case H_LOGICAL_CI_STORE: + case H_SET_MODE: #ifdef CONFIG_KVM_XICS case H_XIRR: case H_CPPR: @@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = { H_BULK_REMOVE, H_PUT_TCE, H_CEDE, + H_SET_MODE, #ifdef CONFIG_KVM_XICS H_XIRR, H_CPPR, From f771b55731fc82b1e8e9ef123f6f1b8d8c92bc63 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 7 Mar 2022 13:26:25 +1100 Subject: [PATCH 004/708] KVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3 Use KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. [1] https://lists.nongnu.org/archive/html/qemu-ppc/2022-02/msg00437.html [2] https://lists.nongnu.org/archive/html/qemu-ppc/2022-02/msg00439.html Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas [mpe: Rebase onto 93b71801a827 from kvm-ppc-cap-210 branch, add EXPORT_SYMBOL] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220222064727.2314380-4-npiggin@gmail.com --- arch/powerpc/include/asm/setup.h | 2 ++ arch/powerpc/kvm/powerpc.c | 17 +++++++++++++++++ arch/powerpc/platforms/pseries/setup.c | 13 ++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..a555fb77258a 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -28,11 +28,13 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES +extern bool pseries_reloc_on_exception(void); extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); void __init pseries_little_endian_exceptions(void); #else +static inline bool pseries_reloc_on_exception(void) { return false; } static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9772b176e406..875c30c12db0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_PPC_AIL_MODE_3: + r = 0; + /* + * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode. + * The POWER9s can support it if the guest runs in hash mode, + * but QEMU doesn't necessarily query the capability in time. + */ + if (hv_enabled) { + if (kvmhv_on_pseries()) { + if (pseries_reloc_on_exception()) + r = 1; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { + r = 1; + } + } + break; default: r = 0; break; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..5bdbbe2151b1 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -353,6 +353,14 @@ static void pseries_lpar_idle(void) pseries_idle_epilog(); } +static bool pseries_reloc_on_exception_enabled; + +bool pseries_reloc_on_exception(void) +{ + return pseries_reloc_on_exception_enabled; +} +EXPORT_SYMBOL_GPL(pseries_reloc_on_exception); + /* * Enable relocation on during exceptions. This has partition wide scope and * may take a while to complete, if it takes longer than one second we will @@ -377,6 +385,7 @@ bool pseries_enable_reloc_on_exc(void) " on exceptions: %ld\n", rc); return false; } + pseries_reloc_on_exception_enabled = true; return true; } @@ -404,7 +413,9 @@ void pseries_disable_reloc_on_exc(void) break; mdelay(get_longbusy_msecs(rc)); } - if (rc != H_SUCCESS) + if (rc == H_SUCCESS) + pseries_reloc_on_exception_enabled = false; + else pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", rc); } From d14eb80e27795b7b20060f7b151cdfe39722a813 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 17 Mar 2022 23:55:37 +0100 Subject: [PATCH 005/708] drm/panel: ili9341: fix optional regulator handling If the optional regulator lookup fails, reset the pointer to NULL. Other functions such as mipi_dbi_poweron_reset_conditional() only do a NULL pointer check and will otherwise dereference the error pointer. Fixes: 5a04227326b04c15 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Daniel Mack Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220317225537.826302-1-daniel@zonque.org --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 2c3378a259b1..e1542451ef9d 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -612,8 +612,10 @@ static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc, int ret; vcc = devm_regulator_get_optional(dev, "vcc"); - if (IS_ERR(vcc)) + if (IS_ERR(vcc)) { dev_err(dev, "get optional vcc failed\n"); + vcc = NULL; + } dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver, struct mipi_dbi_dev, drm); From c5c948aa894a831f96fccd025e47186b1ee41615 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 15 Mar 2022 14:53:24 -0400 Subject: [PATCH 006/708] drm/amd: Add USBC connector ID [Why&How] Add a dedicated AMDGPU specific ID for use with newer ASICs that support USB-C output Signed-off-by: Aurabindo Pillai Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ObjectID.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f592..a0f0a17e224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ From 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 7 Mar 2022 02:47:17 -0800 Subject: [PATCH 007/708] btrfs: zoned: traverse devices under chunk_mutex in btrfs_can_activate_zone btrfs_can_activate_zone() can be called with the device_list_mutex already held, which will lead to a deadlock: insert_dev_extents() // Takes device_list_mutex `-> insert_dev_extent() `-> btrfs_insert_empty_item() `-> btrfs_insert_empty_items() `-> btrfs_search_slot() `-> btrfs_cow_block() `-> __btrfs_cow_block() `-> btrfs_alloc_tree_block() `-> btrfs_reserve_extent() `-> find_free_extent() `-> find_free_extent_update_loop() `-> can_allocate_chunk() `-> btrfs_can_activate_zone() // Takes device_list_mutex again Instead of using the RCU on fs_devices->device_list we can use fs_devices->alloc_list, protected by the chunk_mutex to traverse the list of active devices. We are in the chunk allocation thread. The newer chunk allocation happens from the devices in the fs_device->alloc_list protected by the chunk_mutex. btrfs_create_chunk() lockdep_assert_held(&info->chunk_mutex); gather_device_info list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) Also, a device that reappears after the mount won't join the alloc_list yet and, it will be in the dev_list, which we don't want to consider in the context of the chunk alloc. [15.166572] WARNING: possible recursive locking detected [15.167117] 5.17.0-rc6-dennis #79 Not tainted [15.167487] -------------------------------------------- [15.167733] kworker/u8:3/146 is trying to acquire lock: [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: find_free_extent+0x15a/0x14f0 [btrfs] [15.167733] [15.167733] but task is already holding lock: [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs] [15.167733] [15.167733] other info that might help us debug this: [15.167733] Possible unsafe locking scenario: [15.167733] [15.171834] CPU0 [15.171834] ---- [15.171834] lock(&fs_devs->device_list_mutex); [15.171834] lock(&fs_devs->device_list_mutex); [15.171834] [15.171834] *** DEADLOCK *** [15.171834] [15.171834] May be due to missing lock nesting notation [15.171834] [15.171834] 5 locks held by kworker/u8:3/146: [15.171834] #0: ffff888100050938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0 [15.171834] #1: ffffc9000067be80 ((work_completion)(&fs_info->async_data_reclaim_work)){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0 [15.176244] #2: ffff88810521e620 (sb_internal){.+.+}-{0:0}, at: flush_space+0x335/0x600 [btrfs] [15.176244] #3: ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs] [15.176244] #4: ffff8881152e4b78 (btrfs-dev-00){++++}-{3:3}, at: __btrfs_tree_lock+0x27/0x130 [btrfs] [15.179641] [15.179641] stack backtrace: [15.179641] CPU: 1 PID: 146 Comm: kworker/u8:3 Not tainted 5.17.0-rc6-dennis #79 [15.179641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014 [15.179641] Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs] [15.179641] Call Trace: [15.179641] [15.179641] dump_stack_lvl+0x45/0x59 [15.179641] __lock_acquire.cold+0x217/0x2b2 [15.179641] lock_acquire+0xbf/0x2b0 [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] [15.183838] __mutex_lock+0x8e/0x970 [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] [15.183838] ? lock_is_held_type+0xd7/0x130 [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] [15.183838] find_free_extent+0x15a/0x14f0 [btrfs] [15.183838] ? _raw_spin_unlock+0x24/0x40 [15.183838] ? btrfs_get_alloc_profile+0x106/0x230 [btrfs] [15.187601] btrfs_reserve_extent+0x131/0x260 [btrfs] [15.187601] btrfs_alloc_tree_block+0xb5/0x3b0 [btrfs] [15.187601] __btrfs_cow_block+0x138/0x600 [btrfs] [15.187601] btrfs_cow_block+0x10f/0x230 [btrfs] [15.187601] btrfs_search_slot+0x55f/0xbc0 [btrfs] [15.187601] ? lock_is_held_type+0xd7/0x130 [15.187601] btrfs_insert_empty_items+0x2d/0x60 [btrfs] [15.187601] btrfs_create_pending_block_groups+0x2b3/0x560 [btrfs] [15.187601] __btrfs_end_transaction+0x36/0x2a0 [btrfs] [15.192037] flush_space+0x374/0x600 [btrfs] [15.192037] ? find_held_lock+0x2b/0x80 [15.192037] ? btrfs_async_reclaim_data_space+0x49/0x180 [btrfs] [15.192037] ? lock_release+0x131/0x2b0 [15.192037] btrfs_async_reclaim_data_space+0x70/0x180 [btrfs] [15.192037] process_one_work+0x24c/0x5a0 [15.192037] worker_thread+0x4a/0x3d0 Fixes: a85f05e59bc1 ("btrfs: zoned: avoid chunk allocation if active block group has enough space") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Anand Jain Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b7b5fac1c779..61125aec8723 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1976,18 +1976,19 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group) bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) { + struct btrfs_fs_info *fs_info = fs_devices->fs_info; struct btrfs_device *device; bool ret = false; - if (!btrfs_is_zoned(fs_devices->fs_info)) + if (!btrfs_is_zoned(fs_info)) return true; /* Non-single profiles are not supported yet */ ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0); /* Check if there is a device with active zones left */ - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { + mutex_lock(&fs_info->chunk_mutex); + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { struct btrfs_zoned_device_info *zinfo = device->zone_info; if (!device->bdev) @@ -1999,7 +2000,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) break; } } - mutex_unlock(&fs_devices->device_list_mutex); + mutex_unlock(&fs_info->chunk_mutex); return ret; } From 62ed0bf7315b524973bb5fb9174b60e353289835 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 7 Mar 2022 02:47:18 -0800 Subject: [PATCH 008/708] btrfs: zoned: remove left over ASSERT checking for single profile With commit dcf5652291f6 ("btrfs: zoned: allow DUP on meta-data block groups") we started allowing DUP on metadata block groups, so the ASSERT()s in btrfs_can_activate_zone() and btrfs_zoned_get_device() are no longer valid and in fact even harmful. Fixes: dcf5652291f6 ("btrfs: zoned: allow DUP on meta-data block groups") CC: stable@vger.kernel.org # 5.17 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 61125aec8723..1b1b310c3c51 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1801,7 +1801,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, map = em->map_lookup; /* We only support single profile for now */ - ASSERT(map->num_stripes == 1); device = map->stripes[0].dev; free_extent_map(em); @@ -1983,9 +1982,6 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) if (!btrfs_is_zoned(fs_info)) return true; - /* Non-single profiles are not supported yet */ - ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0); - /* Check if there is a device with active zones left */ mutex_lock(&fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { From b642b52d0b50f4d398cb4293f64992d0eed2e2ce Mon Sep 17 00:00:00 2001 From: Ethan Lien Date: Mon, 7 Mar 2022 18:00:04 +0800 Subject: [PATCH 009/708] btrfs: fix qgroup reserve overflow the qgroup limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We use extent_changeset->bytes_changed in qgroup_reserve_data() to record how many bytes we set for EXTENT_QGROUP_RESERVED state. Currently the bytes_changed is set as "unsigned int", and it will overflow if we try to fallocate a range larger than 4GiB. The result is we reserve less bytes and eventually break the qgroup limit. Unlike regular buffered/direct write, which we use one changeset for each ordered extent, which can never be larger than 256M. For fallocate, we use one changeset for the whole range, thus it no longer respects the 256M per extent limit, and caused the problem. The following example test script reproduces the problem: $ cat qgroup-overflow.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount $DEV $MNT # Set qgroup limit to 2GiB. btrfs quota enable $MNT btrfs qgroup limit 2G $MNT # Try to fallocate a 3GiB file. This should fail. echo echo "Try to fallocate a 3GiB file..." fallocate -l 3G $MNT/3G.file # Try to fallocate a 5GiB file. echo echo "Try to fallocate a 5GiB file..." fallocate -l 5G $MNT/5G.file # See we break the qgroup limit. echo sync btrfs qgroup show -r $MNT umount $MNT When running the test: $ ./qgroup-overflow.sh (...) Try to fallocate a 3GiB file... fallocate: fallocate failed: Disk quota exceeded Try to fallocate a 5GiB file... qgroupid         rfer         excl     max_rfer --------         ----         ----     -------- 0/5           5.00GiB      5.00GiB      2.00GiB Since we have no control of how bytes_changed is used, it's better to set it to u64. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Ethan Lien Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0399cf8e3c32..151e9da5da2d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -118,7 +118,7 @@ struct btrfs_bio_ctrl { */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; From f7eab1ddb9f8bc99206e3efa8d34ca1d2faca209 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Mar 2022 15:46:04 -0800 Subject: [PATCH 010/708] drm/msm/gpu: Rename runtime suspend/resume functions Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220310234611.424743-2-robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 89cfd84760d7..8859834b51b8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -600,7 +600,7 @@ static const struct of_device_id dt_match[] = { }; #ifdef CONFIG_PM -static int adreno_resume(struct device *dev) +static int adreno_runtime_resume(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); @@ -616,7 +616,7 @@ static int active_submits(struct msm_gpu *gpu) return active_submits; } -static int adreno_suspend(struct device *dev) +static int adreno_runtime_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); int remaining; @@ -635,7 +635,7 @@ static int adreno_suspend(struct device *dev) static const struct dev_pm_ops adreno_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(adreno_suspend, adreno_resume, NULL) + SET_RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL) }; static struct platform_driver adreno_driver = { From 7e4167c9e021afb01fb69abae8642d781c8907b6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Mar 2022 15:46:05 -0800 Subject: [PATCH 011/708] drm/msm/gpu: Park scheduler threads for system suspend In the system suspend path, we don't want to be racing with the scheduler kthreads pushing additional queued up jobs to the hw queue (ringbuffer). So park them first. While we are at it, move the wait for active jobs to complete into the new system- suspend path. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220310234611.424743-3-robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_device.c | 68 ++++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 8859834b51b8..0440a98988fc 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -619,22 +619,82 @@ static int active_submits(struct msm_gpu *gpu) static int adreno_runtime_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); - int remaining; + + /* + * We should be holding a runpm ref, which will prevent + * runtime suspend. In the system suspend path, we've + * already waited for active jobs to complete. + */ + WARN_ON_ONCE(gpu->active_submits); + + return gpu->funcs->pm_suspend(gpu); +} + +static void suspend_scheduler(struct msm_gpu *gpu) +{ + int i; + + /* + * Shut down the scheduler before we force suspend, so that + * suspend isn't racing with scheduler kthread feeding us + * more work. + * + * Note, we just want to park the thread, and let any jobs + * that are already on the hw queue complete normally, as + * opposed to the drm_sched_stop() path used for handling + * faulting/timed-out jobs. We can't really cancel any jobs + * already on the hw queue without racing with the GPU. + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; + kthread_park(sched->thread); + } +} + +static void resume_scheduler(struct msm_gpu *gpu) +{ + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; + kthread_unpark(sched->thread); + } +} + +static int adreno_system_suspend(struct device *dev) +{ + struct msm_gpu *gpu = dev_to_gpu(dev); + int remaining, ret; + + suspend_scheduler(gpu); remaining = wait_event_timeout(gpu->retire_event, active_submits(gpu) == 0, msecs_to_jiffies(1000)); if (remaining == 0) { dev_err(dev, "Timeout waiting for GPU to suspend\n"); - return -EBUSY; + ret = -EBUSY; + goto out; } - return gpu->funcs->pm_suspend(gpu); + ret = pm_runtime_force_suspend(dev); +out: + if (ret) + resume_scheduler(gpu); + + return ret; } + +static int adreno_system_resume(struct device *dev) +{ + resume_scheduler(dev_to_gpu(dev)); + return pm_runtime_force_resume(dev); +} + #endif static const struct dev_pm_ops adreno_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume) SET_RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL) }; From 7242795d520d3fb48e005e3c96ba54bb59639d6e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Mar 2022 15:46:06 -0800 Subject: [PATCH 012/708] drm/msm/gpu: Remove mutex from wait_event condition The mutex wasn't really protecting anything before. Before the previous patch we could still be racing with the scheduler's kthread, as that is not necessarily frozen yet. Now that we've parked the sched threads, the only race is with jobs retiring, and that is harmless, ie. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220310234611.424743-4-robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_device.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 0440a98988fc..661dfa7681fb 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -607,15 +607,6 @@ static int adreno_runtime_resume(struct device *dev) return gpu->funcs->pm_resume(gpu); } -static int active_submits(struct msm_gpu *gpu) -{ - int active_submits; - mutex_lock(&gpu->active_lock); - active_submits = gpu->active_submits; - mutex_unlock(&gpu->active_lock); - return active_submits; -} - static int adreno_runtime_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); @@ -669,7 +660,7 @@ static int adreno_system_suspend(struct device *dev) suspend_scheduler(gpu); remaining = wait_event_timeout(gpu->retire_event, - active_submits(gpu) == 0, + gpu->active_submits == 0, msecs_to_jiffies(1000)); if (remaining == 0) { dev_err(dev, "Timeout waiting for GPU to suspend\n"); From ac3e4f42d5ec459f701743debd9c1ad2f2247402 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 17 Mar 2022 11:45:49 -0700 Subject: [PATCH 013/708] drm/msm: Add missing put_task_struct() in debugfs path Fixes: 25faf2f2e065 ("drm/msm: Show process names in gem_describe") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220317184550.227991-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 02b9ae65a96a..a4f61972667b 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -926,6 +926,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, get_pid_task(aspace->pid, PIDTYPE_PID); if (task) { comm = kstrdup(task->comm, GFP_KERNEL); + put_task_struct(task); } else { comm = NULL; } From 05241de1f69eb7f56b0a5e0bec96a7752fad1b2f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 24 Mar 2022 14:55:36 +0300 Subject: [PATCH 014/708] dt-bindings: display/msm: another fix for the dpu-qcm2290 example Make dpu-qcm2290 example really follow the defined schema: - Drop qcom,mdss compatible. It's only used for MDP5 devices. - Change display controller name to display-controller as specified in the yaml Reported-by: Rob Herring Cc: Loic Poulain Fixes: 164f69d9d45a ("dt-bindings: msm: disp: add yaml schemas for QCM2290 DPU bindings") Signed-off-by: Dmitry Baryshkov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220324115536.2090818-1-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- .../devicetree/bindings/display/msm/dpu-qcm2290.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/dpu-qcm2290.yaml b/Documentation/devicetree/bindings/display/msm/dpu-qcm2290.yaml index d31483a78eab..6fb7e321f011 100644 --- a/Documentation/devicetree/bindings/display/msm/dpu-qcm2290.yaml +++ b/Documentation/devicetree/bindings/display/msm/dpu-qcm2290.yaml @@ -160,7 +160,7 @@ examples: mdss: mdss@5e00000 { #address-cells = <1>; #size-cells = <1>; - compatible = "qcom,qcm2290-mdss", "qcom,mdss"; + compatible = "qcom,qcm2290-mdss"; reg = <0x05e00000 0x1000>; reg-names = "mdss"; power-domains = <&dispcc MDSS_GDSC>; @@ -180,7 +180,7 @@ examples: <&apps_smmu 0x421 0x0>; ranges; - mdss_mdp: mdp@5e01000 { + mdss_mdp: display-controller@5e01000 { compatible = "qcom,qcm2290-dpu"; reg = <0x05e01000 0x8f000>, <0x05eb0000 0x2008>; From bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 8 Mar 2022 13:36:38 +0800 Subject: [PATCH 015/708] btrfs: remove device item and update super block in the same transaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There is a report that a btrfs has a bad super block num devices. This makes btrfs to reject the fs completely. BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here BTRFS error (device sdd3): failed to read chunk tree: -22 BTRFS error (device sdd3): open_ctree failed [CAUSE] During btrfs device removal, chunk tree and super block num devs are updated in two different transactions: btrfs_rm_device() |- btrfs_rm_dev_item(device) | |- trans = btrfs_start_transaction() | | Now we got transaction X | | | |- btrfs_del_item() | | Now device item is removed from chunk tree | | | |- btrfs_commit_transaction() | Transaction X got committed, super num devs untouched, | but device item removed from chunk tree. | (AKA, super num devs is already incorrect) | |- cur_devices->num_devices--; |- cur_devices->total_devices--; |- btrfs_set_super_num_devices() All those operations are not in transaction X, thus it will only be written back to disk in next transaction. So after the transaction X in btrfs_rm_dev_item() committed, but before transaction X+1 (which can be minutes away), a power loss happen, then we got the super num mismatch. [FIX] Instead of starting and committing a transaction inside btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and pass it to btrfs_rm_dev_item(). And only commit the transaction after everything is done. Reported-by: Luca Béla Palkovics Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 65 ++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1be7cb2f955f..2cfbc74a3b4e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path) path_put(&path); } -static int btrfs_rm_dev_item(struct btrfs_device *device) +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { struct btrfs_root *root = device->fs_info->chunk_root; int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_trans_handle *trans; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - btrfs_free_path(path); - return PTR_ERR(trans); - } key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - } - out: btrfs_free_path(path); - if (!ret) - ret = btrfs_commit_transaction(trans); return ret; } @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, struct block_device **bdev, fmode_t *mode) { + struct btrfs_trans_handle *trans; struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); if (ret) - goto out; + return ret; device = btrfs_find_device(fs_info->fs_devices, args); if (!device) { @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = -ENOENT; - goto out; + return ret; } if (btrfs_pinned_by_swapfile(fs_info, device)) { btrfs_warn_in_rcu(fs_info, "cannot remove device %s (devid %llu) due to active swapfile", rcu_str_deref(device->name), device->devid); - ret = -ETXTBSY; - goto out; + return -ETXTBSY; } - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { - ret = BTRFS_ERROR_DEV_TGT_REPLACE; - goto out; - } + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) + return BTRFS_ERROR_DEV_TGT_REPLACE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && - fs_info->fs_devices->rw_devices == 1) { - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; - goto out; - } + fs_info->fs_devices->rw_devices == 1) + return BTRFS_ERROR_DEV_ONLY_WRITABLE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, if (ret) goto error_undo; - /* - * TODO: the superblock still includes this device in its num_devices - * counter although write_all_supers() is not locked out. This - * could give a filesystem state which requires a degraded mount. - */ - ret = btrfs_rm_dev_item(device); - if (ret) + trans = btrfs_start_transaction(fs_info->chunk_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto error_undo; + } + + ret = btrfs_rm_dev_item(trans, device); + if (ret) { + /* Any error in dev item removal is critical */ + btrfs_crit(fs_info, + "failed to remove device item for devid %llu: %d", + device->devid, ret); + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; + } clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); btrfs_scrub_cancel_dev(device); @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, free_fs_devices(cur_devices); } -out: + ret = btrfs_commit_transaction(trans); + return ret; error_undo: @@ -2240,7 +2231,7 @@ error_undo: device->fs_devices->rw_devices++; mutex_unlock(&fs_info->chunk_mutex); } - goto out; + return ret; } void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) From 05fd9564e9faf0f23b4676385e27d9405cef6637 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 14 Mar 2022 10:55:32 -0700 Subject: [PATCH 016/708] btrfs: fix fallocate to use file_modified to update permissions consistently Since the initial introduction of (posix) fallocate back at the turn of the century, it has been possible to use this syscall to change the user-visible contents of files. This can happen by extending the file size during a preallocation, or through any of the newer modes (punch, zero range). Because the call can be used to change file contents, we should treat it like we do any other modification to a file -- update the mtime, and drop set[ug]id privileges/capabilities. The VFS function file_modified() does all this for us if pass it a locked inode, so let's make fallocate drop permissions correctly. Reviewed-by: Filipe Manana Signed-off-by: Darrick J. Wong Signed-off-by: David Sterba --- fs/btrfs/file.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9f455c96c974..380054c94e4b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2957,8 +2957,9 @@ out: return ret; } -static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; @@ -2990,6 +2991,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } + ret = file_modified(file); + if (ret) + goto out_only_mutex; + lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockend = round_down(offset + len, btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; @@ -3430,7 +3435,7 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - return btrfs_punch_hole(inode, offset, len); + return btrfs_punch_hole(file, offset, len); /* * Only trigger disk allocation, don't trigger qgroup reserve @@ -3452,6 +3457,10 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } + ret = file_modified(file); + if (ret) + goto out; + /* * TODO: Move these two operations after we have checked * accurate reserved space, or fallocate can still fail but From 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 15 Mar 2022 19:28:05 +0800 Subject: [PATCH 017/708] btrfs: avoid defragging extents whose next extents are not targets [BUG] There is a report that autodefrag is defragging single sector, which is completely waste of IO, and no help for defragging: btrfs-cleaner-808 defrag_one_locked_range: root=256 ino=651122 start=0 len=4096 [CAUSE] In defrag_collect_targets(), we check if the current range (A) can be merged with next one (B). If mergeable, we will add range A into target for defrag. However there is a catch for autodefrag, when checking mergeability against range B, we intentionally pass 0 as @newer_than, hoping to get a higher chance to merge with the next extent. But in the next iteration, range B will looked up by defrag_lookup_extent(), with non-zero @newer_than. And if range B is not really newer, it will rejected directly, causing only range A being defragged, while we expect to defrag both range A and B. [FIX] Since the root cause is the difference in check condition of defrag_check_next_extent() and defrag_collect_targets(), we fix it by: 1. Pass @newer_than to defrag_check_next_extent() 2. Pass @extent_thresh to defrag_check_next_extent() This makes the check between defrag_collect_targets() and defrag_check_next_extent() more consistent. While there is still some minor difference, the remaining checks are focus on runtime flags like writeback/delalloc, which are mostly transient and safe to be checked only in defrag_collect_targets(). Link: https://github.com/btrfs/linux/issues/423#issuecomment-1066981856 CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 238cee5b5254..f46e71061942 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1239,7 +1239,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em) } static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, - bool locked) + u32 extent_thresh, u64 newer_than, bool locked) { struct extent_map *next; bool ret = false; @@ -1249,11 +1249,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, return false; /* - * We want to check if the next extent can be merged with the current - * one, which can be an extent created in a past generation, so we pass - * a minimum generation of 0 to defrag_lookup_extent(). + * Here we need to pass @newer_then when checking the next extent, or + * we will hit a case we mark current extent for defrag, but the next + * one will not be a target. + * This will just cause extra IO without really reducing the fragments. */ - next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); + next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked); /* No more em or hole */ if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) goto out; @@ -1265,6 +1266,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, */ if (next->len >= get_extent_max_capacity(em)) goto out; + /* Skip older extent */ + if (next->generation < newer_than) + goto out; + /* Also check extent size */ + if (next->len >= extent_thresh) + goto out; + ret = true; out: free_extent_map(next); @@ -1470,7 +1478,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto next; next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, - locked); + extent_thresh, newer_than, locked); if (!next_mergeable) { struct defrag_target_range *last; From a7d16d9a07bbcb7dcd5214a1bea75c808830bc0d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 Mar 2022 11:30:36 -0400 Subject: [PATCH 018/708] btrfs: do not warn for free space inode in cow_file_range This is a long time leftover from when I originally added the free space inode, the point was to catch cases where we weren't honoring the NOCOW flag. However there exists a race with relocation, if we allocate our free space inode in a block group that is about to be relocated, we could trigger the COW path before the relocation has the opportunity to find the extents and delete the free space cache. In production where we have auto-relocation enabled we're seeing this WARN_ON_ONCE() around 5k times in a 2 week period, so not super common but enough that it's at the top of our metrics. We're properly handling the error here, and with us phasing out v1 space cache anyway just drop the WARN_ON_ONCE. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e7143ff5523..b976f757571f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1128,7 +1128,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, int ret = 0; if (btrfs_is_free_space_inode(inode)) { - WARN_ON_ONCE(1); ret = -EINVAL; goto out_unlock; } From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001 From: Kaiwen Hu Date: Wed, 23 Mar 2022 15:10:32 +0800 Subject: [PATCH 019/708] btrfs: prevent subvol with swapfile from being deleted A subvolume with an active swapfile must not be deleted otherwise it would not be possible to deactivate it. After the subvolume is deleted, we cannot swapoff the swapfile in this deleted subvolume because the path is unreachable. The swapfile is still active and holding references, the filesystem cannot be unmounted. The test looks like this: mkfs.btrfs -f $dev > /dev/null mount $dev $mnt btrfs sub create $mnt/subvol touch $mnt/subvol/swapfile chmod 600 $mnt/subvol/swapfile chattr +C $mnt/subvol/swapfile dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096 mkswap $mnt/subvol/swapfile swapon $mnt/subvol/swapfile btrfs sub delete $mnt/subvol swapoff $mnt/subvol/swapfile # failed: No such file or directory swapoff --all unmount $mnt # target is busy. To prevent above issue, we simply check that whether the subvolume contains any active swapfile, and stop the deleting process. This behavior is like snapshot ioctl dealing with a swapfile. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Robbie Ko Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Kaiwen Hu Signed-off-by: David Sterba --- fs/btrfs/inode.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b976f757571f..5aab6af88349 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4487,6 +4487,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) dest->root_key.objectid); return -EPERM; } + if (atomic_read(&dest->nr_swapfiles)) { + spin_unlock(&dest->root_item_lock); + btrfs_warn(fs_info, + "attempt to delete subvolume %llu with active swapfile", + root->root_key.objectid); + return -EPERM; + } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); @@ -11110,8 +11117,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. + * + * It is possible that subvolume is marked for deletion but still not + * removed yet. To prevent this race, we check the root status before + * activating the swapfile. */ + spin_lock(&root->root_item_lock); + if (btrfs_root_dead(root)) { + spin_unlock(&root->root_item_lock); + + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because subvolume %llu is being deleted", + root->root_key.objectid); + return -EPERM; + } atomic_inc(&root->nr_swapfiles); + spin_unlock(&root->root_item_lock); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); From 24b488061b97a6c6ff82c433e6843eaf54f41f3c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 24 Mar 2022 12:10:55 +0100 Subject: [PATCH 020/708] MAINTAINERS: update Lorenzo's email address Using my kernel.org email. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/e98fcf759f8c23a9736f1c4d20ca0437e4b145de.1648120046.git.lorenzo@kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 91c04cb65247..e406a6db67d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12222,7 +12222,7 @@ F: drivers/mmc/host/mtk-sd.c MEDIATEK MT76 WIRELESS LAN DRIVER M: Felix Fietkau -M: Lorenzo Bianconi +M: Lorenzo Bianconi M: Ryder Lee R: Shayne Chen R: Sean Wang From caaf2ae712b7cc3c7717898fe267dbf882a502ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 24 Jan 2022 14:03:24 +0100 Subject: [PATCH 021/708] dma-buf: Add dma_fence_array_for_each (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to iterate over all fences in a dma_fence_array object. v2 (Jason Ekstrand) - Return NULL from dma_fence_array_first if head == NULL. This matches the iterator behavior of dma_fence_chain_for_each in that it iterates zero times if head == NULL. - Return NULL from dma_fence_array_next if index > array->num_fences. Signed-off-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Reviewed-by: Christian König Cc: Daniel Vetter Cc: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210610210925.642582-2-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/dma-buf/dma-fence-array.c | 27 +++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index cb1bacb5a42b..52b85d292383 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -219,3 +219,30 @@ bool dma_fence_match_context(struct dma_fence *fence, u64 context) return true; } EXPORT_SYMBOL(dma_fence_match_context); + +struct dma_fence *dma_fence_array_first(struct dma_fence *head) +{ + struct dma_fence_array *array; + + if (!head) + return NULL; + + array = to_dma_fence_array(head); + if (!array) + return head; + + return array->fences[0]; +} +EXPORT_SYMBOL(dma_fence_array_first); + +struct dma_fence *dma_fence_array_next(struct dma_fence *head, + unsigned int index) +{ + struct dma_fence_array *array = to_dma_fence_array(head); + + if (!array || index >= array->num_fences) + return NULL; + + return array->fences[index]; +} +EXPORT_SYMBOL(dma_fence_array_next); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index fec374f69e12..e34dcb0bb462 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -61,6 +61,19 @@ to_dma_fence_array(struct dma_fence *fence) return container_of(fence, struct dma_fence_array, base); } +/** + * dma_fence_array_for_each - iterate over all fences in array + * @fence: current fence + * @index: index into the array + * @head: potential dma_fence_array object + * + * Test if @array is a dma_fence_array object and if yes iterate over all fences + * in the array. If not just iterate over the fence in @array itself. + */ +#define dma_fence_array_for_each(fence, index, head) \ + for (index = 0, fence = dma_fence_array_first(head); fence; \ + ++(index), fence = dma_fence_array_next(head, index)) + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, @@ -68,4 +81,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, bool dma_fence_match_context(struct dma_fence *fence, u64 context); +struct dma_fence *dma_fence_array_first(struct dma_fence *head); +struct dma_fence *dma_fence_array_next(struct dma_fence *head, + unsigned int index); + #endif /* __LINUX_DMA_FENCE_ARRAY_H */ From 64a8f92fd783e750cdb81af75942dcd53bbf61bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 11 Mar 2022 10:27:53 +0100 Subject: [PATCH 022/708] dma-buf: add dma_fence_unwrap v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a general purpose helper to deep dive into dma_fence_chain/dma_fence_array structures and iterate over all the fences in them. This is useful when we need to flatten out all fences in those structures. v2: some selftests cleanup, improved function naming and documentation Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220311110244.1245-1-christian.koenig@amd.com --- Documentation/driver-api/dma-buf.rst | 6 + drivers/dma-buf/Makefile | 1 + drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-fence-unwrap.c | 261 ++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 2 + include/linux/dma-fence-chain.h | 2 + include/linux/dma-fence-unwrap.h | 95 ++++++++++ 7 files changed, 368 insertions(+) create mode 100644 drivers/dma-buf/st-dma-fence-unwrap.c create mode 100644 include/linux/dma-fence-unwrap.h diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 55006678394a..36a76cbe9095 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -185,6 +185,12 @@ DMA Fence Chain .. kernel-doc:: include/linux/dma-fence-chain.h :internal: +DMA Fence unwrap +~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/dma-fence-unwrap.h + :internal: + DMA Fence uABI/Sync File ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 511805dbeb75..4c9eb53ba3f8 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -12,6 +12,7 @@ dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \ st-dma-fence-chain.o \ + st-dma-fence-unwrap.o \ st-dma-resv.o obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 97d73aaa31da..851965867d9c 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,4 +12,5 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_fence_unwrap, dma_fence_unwrap) selftest(dma_resv, dma_resv) diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c new file mode 100644 index 000000000000..039f016b57be --- /dev/null +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + */ + +#include +#if 0 +#include +#include +#include +#include +#include +#include +#include +#endif + +#include "selftest.h" + +#define CHAIN_SZ (4 << 10) + +static inline struct mock_fence { + struct dma_fence base; + spinlock_t lock; +} *to_mock_fence(struct dma_fence *f) { + return container_of(f, struct mock_fence, base); +} + +static const char *mock_name(struct dma_fence *f) +{ + return "mock"; +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, +}; + +static struct dma_fence *mock_fence(void) +{ + struct mock_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + spin_lock_init(&f->lock); + dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + + return &f->base; +} + +static struct dma_fence *mock_array(unsigned int num_fences, ...) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + va_list valist; + int i; + + fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) + return NULL; + + va_start(valist, num_fences); + for (i = 0; i < num_fences; ++i) + fences[i] = va_arg(valist, typeof(*fences)); + va_end(valist); + + array = dma_fence_array_create(num_fences, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) + goto cleanup; + return &array->base; + +cleanup: + for (i = 0; i < num_fences; ++i) + dma_fence_put(fences[i]); + kfree(fences); + return NULL; +} + +static struct dma_fence *mock_chain(struct dma_fence *prev, + struct dma_fence *fence) +{ + struct dma_fence_chain *f; + + f = dma_fence_chain_alloc(); + if (!f) { + dma_fence_put(prev); + dma_fence_put(fence); + return NULL; + } + + dma_fence_chain_init(f, prev, fence, 1); + return &f->base; +} + +static int sanitycheck(void *arg) +{ + struct dma_fence *f, *chain, *array; + int err = 0; + + f = mock_fence(); + if (!f) + return -ENOMEM; + + array = mock_array(1, f); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(chain); + return err; +} + +static int unwrap_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, array) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(array); + return 0; +} + +static int unwrap_chain(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + chain = mock_chain(f1, f2); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +static int unwrap_chain_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +int dma_fence_unwrap(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(unwrap_array), + SUBTEST(unwrap_chain), + SUBTEST(unwrap_chain_array), + }; + + return subtests(tests, NULL); +} diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index e34dcb0bb462..ec7f25def392 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -69,6 +69,8 @@ to_dma_fence_array(struct dma_fence *fence) * * Test if @array is a dma_fence_array object and if yes iterate over all fences * in the array. If not just iterate over the fence in @array itself. + * + * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_array_for_each(fence, index, head) \ for (index = 0, fence = dma_fence_array_first(head); fence; \ diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 10d51bcdf7b7..4bdf0b96da28 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -112,6 +112,8 @@ static inline void dma_fence_chain_free(struct dma_fence_chain *chain) * * Iterate over all fences in the chain. We keep a reference to the current * fence while inside the loop which must be dropped when breaking out. + * + * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_chain_for_each(iter, head) \ for (iter = dma_fence_get(head); iter; \ diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h new file mode 100644 index 000000000000..77e335a1bcac --- /dev/null +++ b/include/linux/dma-fence-unwrap.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * fence-chain: chain fences together in a timeline + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Authors: + * Christian König + */ + +#ifndef __LINUX_DMA_FENCE_UNWRAP_H +#define __LINUX_DMA_FENCE_UNWRAP_H + +#include +#include + +/** + * struct dma_fence_unwrap - cursor into the container structure + * + * Should be used with dma_fence_unwrap_for_each() iterator macro. + */ +struct dma_fence_unwrap { + /** + * @chain: potential dma_fence_chain, but can be other fence as well + */ + struct dma_fence *chain; + /** + * @array: potential dma_fence_array, but can be other fence as well + */ + struct dma_fence *array; + /** + * @index: last returned index if @array is really a dma_fence_array + */ + unsigned int index; +}; + +/* Internal helper to start new array iteration, don't use directly */ +static inline struct dma_fence * +__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor) +{ + cursor->array = dma_fence_chain_contained(cursor->chain); + cursor->index = 0; + return dma_fence_array_first(cursor->array); +} + +/** + * dma_fence_unwrap_first - return the first fence from fence containers + * @head: the entrypoint into the containers + * @cursor: current position inside the containers + * + * Unwraps potential dma_fence_chain/dma_fence_array containers and return the + * first fence. + */ +static inline struct dma_fence * +dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor) +{ + cursor->chain = dma_fence_get(head); + return __dma_fence_unwrap_array(cursor); +} + +/** + * dma_fence_unwrap_next - return the next fence from a fence containers + * @cursor: current position inside the containers + * + * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return + * the next fence from them. + */ +static inline struct dma_fence * +dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) +{ + struct dma_fence *tmp; + + ++cursor->index; + tmp = dma_fence_array_next(cursor->array, cursor->index); + if (tmp) + return tmp; + + cursor->chain = dma_fence_chain_walk(cursor->chain); + return __dma_fence_unwrap_array(cursor); +} + +/** + * dma_fence_unwrap_for_each - iterate over all fences in containers + * @fence: current fence + * @cursor: current position inside the containers + * @head: starting point for the iterator + * + * Unwrap dma_fence_chain and dma_fence_array containers and deep dive into all + * potential fences in them. If @head is just a normal fence only that one is + * returned. + */ +#define dma_fence_unwrap_for_each(fence, cursor, head) \ + for (fence = dma_fence_unwrap_first(head, cursor); fence; \ + fence = dma_fence_unwrap_next(cursor)) + +#endif From 519f490db07e1a539490612f376487f61e48e39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 11 Mar 2022 10:32:26 +0100 Subject: [PATCH 023/708] dma-buf/sync-file: fix warning about fence containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma_fence_chain containers can show up in sync_files as well resulting in warnings that those can't be added to dma_fence_array containers when merging multiple sync_files together. Solve this by using the dma_fence_unwrap iterator to deep dive into the contained fences and then add those flatten out into a dma_fence_array. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220311110244.1245-2-christian.koenig@amd.com --- drivers/dma-buf/sync_file.c | 141 +++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 394e6e1e9686..b8dea4ec123b 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -5,6 +5,7 @@ * Copyright (C) 2012 Google, Inc. */ +#include #include #include #include @@ -172,20 +173,6 @@ static int sync_file_set_fence(struct sync_file *sync_file, return 0; } -static struct dma_fence **get_fences(struct sync_file *sync_file, - int *num_fences) -{ - if (dma_fence_is_array(sync_file->fence)) { - struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); - - *num_fences = array->num_fences; - return array->fences; - } - - *num_fences = 1; - return &sync_file->fence; -} - static void add_fence(struct dma_fence **fences, int *i, struct dma_fence *fence) { @@ -210,86 +197,97 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { + struct dma_fence *a_fence, *b_fence, **fences; + struct dma_fence_unwrap a_iter, b_iter; + unsigned int index, num_fences; struct sync_file *sync_file; - struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; - int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) return NULL; - a_fences = get_fences(a, &a_num_fences); - b_fences = get_fences(b, &b_num_fences); - if (a_num_fences > INT_MAX - b_num_fences) - goto err; + num_fences = 0; + dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence) + ++num_fences; + dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence) + ++num_fences; - num_fences = a_num_fences + b_num_fences; + if (num_fences > INT_MAX) + goto err_free_sync_file; fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); if (!fences) - goto err; + goto err_free_sync_file; /* - * Assume sync_file a and b are both ordered and have no - * duplicates with the same context. + * We can't guarantee that fences in both a and b are ordered, but it is + * still quite likely. * - * If a sync_file can only be created with sync_file_merge - * and sync_file_create, this is a reasonable assumption. + * So attempt to order the fences as we pass over them and merge fences + * with the same context. */ - for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { - struct dma_fence *pt_a = a_fences[i_a]; - struct dma_fence *pt_b = b_fences[i_b]; - if (pt_a->context < pt_b->context) { - add_fence(fences, &i, pt_a); + index = 0; + for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter), + b_fence = dma_fence_unwrap_first(b->fence, &b_iter); + a_fence || b_fence; ) { - i_a++; - } else if (pt_a->context > pt_b->context) { - add_fence(fences, &i, pt_b); + if (!b_fence) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (!a_fence) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (a_fence->context < b_fence->context) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (b_fence->context < a_fence->context) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno, + a_fence->ops)) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); - i_b++; } else { - if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno, - pt_a->ops)) - add_fence(fences, &i, pt_a); - else - add_fence(fences, &i, pt_b); - - i_a++; - i_b++; + add_fence(fences, &index, b_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); } } - for (; i_a < a_num_fences; i_a++) - add_fence(fences, &i, a_fences[i_a]); + if (index == 0) + add_fence(fences, &index, dma_fence_get_stub()); - for (; i_b < b_num_fences; i_b++) - add_fence(fences, &i, b_fences[i_b]); + if (num_fences > index) { + struct dma_fence **tmp; - if (i == 0) - fences[i++] = dma_fence_get(a_fences[0]); - - if (num_fences > i) { - nfences = krealloc_array(fences, i, sizeof(*fences), GFP_KERNEL); - if (!nfences) - goto err; - - fences = nfences; + /* Keep going even when reducing the size failed */ + tmp = krealloc_array(fences, index, sizeof(*fences), + GFP_KERNEL); + if (tmp) + fences = tmp; } - if (sync_file_set_fence(sync_file, fences, i) < 0) - goto err; + if (sync_file_set_fence(sync_file, fences, index) < 0) + goto err_put_fences; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; -err: - while (i) - dma_fence_put(fences[--i]); +err_put_fences: + while (index) + dma_fence_put(fences[--index]); kfree(fences); + +err_free_sync_file: fput(sync_file->file); return NULL; - } static int sync_file_release(struct inode *inode, struct file *file) @@ -398,11 +396,13 @@ static int sync_fill_fence_info(struct dma_fence *fence, static long sync_file_ioctl_fence_info(struct sync_file *sync_file, unsigned long arg) { - struct sync_file_info info; struct sync_fence_info *fence_info = NULL; - struct dma_fence **fences; + struct dma_fence_unwrap iter; + struct sync_file_info info; + unsigned int num_fences; + struct dma_fence *fence; + int ret; __u32 size; - int num_fences, ret, i; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; @@ -410,7 +410,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (info.flags || info.pad) return -EINVAL; - fences = get_fences(sync_file, &num_fences); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) + ++num_fences; /* * Passing num_fences = 0 means that userspace doesn't want to @@ -433,8 +435,11 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!fence_info) return -ENOMEM; - for (i = 0; i < num_fences; i++) { - int status = sync_fill_fence_info(fences[i], &fence_info[i]); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) { + int status; + + status = sync_fill_fence_info(fence, &fence_info[num_fences++]); info.status = info.status <= 0 ? info.status : status; } From 32f90e65251981f061eec883b0fe9e75d74e9665 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 15 Mar 2022 14:46:28 +0800 Subject: [PATCH 024/708] drm/amdgpu: prevent memory wipe in suspend/shutdown stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GPUs with RAS enabled, below call trace is observed when suspending or shutting down device. The cause is we have enabled memory wipe flag for BOs on such GPUs by default, and such BOs will go to memory wipe by amdgpu_fill_buffer, however, because ring is off already, it fails to clean up the memory and throw this error message. So add a suspend/shutdown check before wipping memory. [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to clear memory with ring turned off. v2: fix coding style issue Fixes: fc6ea4bee13071 ("drm/amdgpu: Wipe all VRAM on free when RAS is enabled") Signed-off-by: Guchun Chen Reviewed-by: Christian König Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25731719c627..940752488330 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1284,6 +1284,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, */ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct dma_fence *fence = NULL; struct amdgpu_bo *abo; int r; @@ -1303,7 +1304,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); if (bo->resource->mem_type != TTM_PL_VRAM || - !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || + adev->in_suspend || adev->shutdown) return; if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) From 2d505453f38e18d42ba7d5428aaa17aaa7752c65 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 15 Mar 2022 14:59:28 +0800 Subject: [PATCH 025/708] drm/amdgpu: conduct a proper cleanup of PDB bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use amdgpu_bo_free_kernel instead of amdgpu_bo_unref to perform a proper cleanup of PDB bo. v2: update subject to be more accurate Signed-off-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 431742eb7811..08ceabd6c853 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1721,7 +1721,7 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; From 60d61f4ed6ead43ad2de31ebb8d1d27c57290529 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 15 Mar 2022 18:19:06 +0800 Subject: [PATCH 026/708] drm/amdgpu/pm: fix the Stable pstate Test in amdgpu_test If GFX DPM is disbaled, Stable pstate Test in amdgpu_test fails. Check GFX DPM statue before change clock level Log: [ 46.595274] [drm] Initialized amdgpu 3.46.0 20150101 for 0000:02:00.0 on minor 0 [ 46.599929] fbcon: amdgpudrmfb (fb0) is primary device [ 46.785753] Console: switching to colour frame buffer device 240x67 [ 46.811765] amdgpu 0000:02:00.0: [drm] fb0: amdgpudrmfb frame buffer device [ 131.398407] amdgpu 0000:02:00.0: amdgpu: Failed to set performance level! Signed-off-by: Yifan Zhang Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 7bfac029e513..b81711c4ff33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -991,7 +991,7 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu, return -EINVAL; } - if (sclk_min && sclk_max) { + if (sclk_min && sclk_max && smu_v13_0_5_clk_dpm_is_enabled(smu, SMU_SCLK)) { ret = smu_v13_0_5_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_min, From 1647b54ed55d4d48c7199d439f8834626576cbe9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Mar 2022 11:41:48 +0300 Subject: [PATCH 027/708] drm/amdgpu: fix off by one in amdgpu_gfx_kiq_acquire() This post-op should be a pre-op so that we do not pass -1 as the bit number to test_bit(). The current code will loop downwards from 63 to -1. After changing to a pre-op, it loops from 63 to 0. Fixes: 71c37505e7ea ("drm/amdgpu/gfx: move more common KIQ code to amdgpu_gfx.c") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8fe939976224..28a736c507bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; From b818a5d374542ccec73dcfe578a081574029820e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Mar 2022 18:02:11 -0500 Subject: [PATCH 028/708] drm/amdgpu/gmc: use PCI BARs for APUs in passthrough MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the GPU is passed through to a guest VM, use the PCI BAR for CPU FB access rather than the physical address of carve out. The physical address is not valid in a guest. v2: Fix HDP handing as suggested by Michel Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3987ecb24ef4..49f734137f15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5733,7 +5733,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5749,7 +5749,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3c1d440824a7..5228421b0f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -814,7 +814,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 344d819b4c1b..979da6f510e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU && - adev->gmc.real_vram_size > adev->gmc.aper_size) { + if ((adev->flags & AMD_IS_APU) && + adev->gmc.real_vram_size > adev->gmc.aper_size && + !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ca9841d5669f..1932a3e4af7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 08ceabd6c853..6009fbfdcc19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1456,7 +1456,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = From 0d8e4eb337644cab528ff3844675d58496ec22db Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 8 Mar 2022 11:26:41 +0800 Subject: [PATCH 029/708] drm/amdgpu: add workarounds for VCN TMZ issue on CHIP_RAVEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is a hardware issue that VCN can't handle a GTT backing stored TMZ buffer on CHIP_RAVEN series ASIC. Move such a TMZ buffer to VRAM domain before command submission as a workaround. v2: - Use patch_cs_in_place callback. v3: - Bail out early if unsecure IBs. Suggested-by: Christian König Signed-off-by: Lang Yu Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dff54190b96c..f0fbcda76f5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -24,6 +24,7 @@ #include #include "amdgpu.h" +#include "amdgpu_cs.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" #include "soc15.h" @@ -1900,6 +1901,75 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_powergating_state = vcn_v1_0_set_powergating_state, }; +/* + * It is a hardware issue that VCN can't handle a GTT TMZ buffer on + * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain + * before command submission as a workaround. + */ +static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; + + bo = mapping->bo_va->base.bo; + if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED)) + return 0; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r); + return r; + } + + return r; +} + +static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t msg_lo = 0, msg_hi = 0; + int i, r; + + if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) + return 0; + + for (i = 0; i < ib->length_dw; i += 2) { + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); + + if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + msg_lo = val; + } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + msg_hi = val; + } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) { + r = vcn_v1_0_validate_bo(p, job, + ((u64)msg_hi) << 32 | msg_lo); + if (r) + return r; + } + } + + return 0; +} + static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1910,6 +1980,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + From 93dde6ccd66d29502506e95f568fd0f49d575d27 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 21 Mar 2022 12:50:36 +0800 Subject: [PATCH 030/708] drm/amdgpu/pm: add asic smu support check It must check asic whether support smu before call smu powerplay function, otherwise it may cause null point on no support smu asic. Signed-off-by: Stanley.Yang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 89fbee568be4..c73fb73e9628 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -500,6 +500,9 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size) struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_pages_num(smu, size); mutex_unlock(&adev->pm.mutex); @@ -512,6 +515,9 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_channel_flag(smu, size); mutex_unlock(&adev->pm.mutex); From 3107e1a7ae088ee94323fe9ab05dbefd65b3077f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 18 Mar 2022 11:10:34 -0400 Subject: [PATCH 031/708] drm/amd/display: Fix p-state allow debug index on dcn31 [Why] It changed since dcn30 but the hubbub31 constructor hasn't been modified to reflect this. [How] Update the value in the constructor to 0x6 so we're checking the right bits for p-state allow. It worked before by accident, but can falsely assert 0 depending on HW state transitions. The most frequent of which appears to be when all pipes turn off during IGT tests. Cc: Harry Wentland Fixes: e7031d8258f1b4 ("drm/amd/display: Add pstate verification and recovery for DCN31") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Eric Yang Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 3e6d6ebd199e..51c5f3685470 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31, hubbub31->detile_buf_size = det_size_kb * 1024; hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024; hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB; + + hubbub31->debug_test_index_pstate = 0x6; } From 5e8a71cf13bc9184fee915b2220be71b4c6cac74 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 7 Mar 2022 18:31:29 -0500 Subject: [PATCH 032/708] drm/amd/display: fix audio format not updated after edid updated [why] for the case edid change only changed audio format. driver still need to update stream. Reviewed-by: Alvin Lee Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7af153434e9e..d251c3f3a714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) return false; return true; From 6bf528ec91fb96e186461215c8f76265c5a35250 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 28 Feb 2022 19:01:59 -0500 Subject: [PATCH 033/708] drm/amd/display: Reduce stack size Linux kernel enabled more compilation restrictions related to the stack size, which caused compilation failures in our code. This commit reduces the allocation size by allocating the required memory dynamically. Reviewed-by: Harry Wentland Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c3e141c19a77..ad757b59e00e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2056,7 +2056,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; - struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0}; + struct dc_crtc_timing *hw_crtc_timing; uint64_t phase[MAX_PIPES]; uint64_t modulo[MAX_PIPES]; unsigned int pclk; @@ -2067,6 +2067,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, uint32_t dp_ref_clk_100hz = dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10; + hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL); + if (!hw_crtc_timing) + return master; + if (dc->config.vblank_alignment_dto_params && dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) { embedded_h_total = @@ -2130,6 +2134,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, } } + + kfree(hw_crtc_timing); return master; } From 7d56a154e22ffb3613fdebf83ec34d5225a22993 Mon Sep 17 00:00:00 2001 From: Chiawen Huang Date: Thu, 10 Mar 2022 00:07:59 +0800 Subject: [PATCH 034/708] drm/amd/display: FEC check in timing validation [Why] disable/enable leads FEC mismatch between hw/sw FEC state. [How] check FEC status to fastboot on/off. Reviewed-by: Anthony Koo Acked-by: Alex Hung Signed-off-by: Chiawen Huang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f6e19efea756..75f9c97bebb0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1496,6 +1496,10 @@ bool dc_validate_boot_timing(const struct dc *dc, if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; + /* Check for FEC status*/ + if (link->link_enc->funcs->fec_is_active(link->link_enc)) + return false; + enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); if (enc_inst == ENGINE_ID_UNKNOWN) From bd219322dbb41cc7c753da3a6936ce09d502f113 Mon Sep 17 00:00:00 2001 From: Oliver Logush Date: Tue, 8 Mar 2022 10:34:04 -0500 Subject: [PATCH 035/708] drm/amd/display: Add fSMC_MSG_SetDtbClk support [why] Needed to support dcn315 Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Oliver Logush Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn315/dcn315_smu.c | 19 +++++++++++++++---- .../display/dc/clk_mgr/dcn315/dcn315_smu.h | 4 +++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 880ffea2afc6..2600313fea57 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define VBIOSSMC_MSG_SetDppclkFreq 0x06 ///< Set DPP clock frequency in MHZ #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ -#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency -#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK frequency, return frequemcy in MHZ +#define VBIOSSMC_MSG_GetDtbclkFreq 0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency +#define VBIOSSMC_MSG_SetDtbClk 0x0A ///< Set dtb clock frequency, return frequemcy in MHZ #define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power #define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event @@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr) return (dprefclk_get_mhz * 1000); } -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr) +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) { int fclk_get_mhz = -1; if (clk_mgr->smu_present) { fclk_get_mhz = dcn315_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_GetFclkFrequency, + VBIOSSMC_MSG_GetDtbclkFreq, 0); } return (fclk_get_mhz * 1000); } + +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) +{ + if (!clk_mgr->smu_present) + return; + + dcn315_smu_send_msg_with_param( + clk_mgr, + VBIOSSMC_MSG_SetDtbClk, + enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h index 66fa42f8dd18..5aa3275ac7d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h @@ -37,6 +37,7 @@ #define NUM_SOC_VOLTAGE_LEVELS 4 #define NUM_DF_PSTATE_LEVELS 4 + typedef struct { uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz) uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz) @@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr); -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr); +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr); +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable); #endif /* DAL_DC_315_SMU_H_ */ From c9fbf6435162ed5fb7201d1d4adf6585c6a8c327 Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Fri, 11 Mar 2022 11:35:29 -0500 Subject: [PATCH 036/708] drm/amd/display: Update VTEM Infopacket definition [Why & How] The latest HDMI SPEC has updated the VTEM packet structure, so change the VTEM Infopacket defined in the driver side to align with the SPEC. Reviewed-by: Chris Park Acked-by: Alex Hung Signed-off-by: Leo (Hanghong) Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index b691aa45e84f..79bc207415bc 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF From 02fc996d5098f4c3f65bdf6cdb6b28e3f29ba789 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 21 Mar 2022 16:25:24 +0800 Subject: [PATCH 037/708] drm/amdgpu/vcn: Fix the register setting for vcn1 Correct the code error for setting register UVD_GFX10_ADDR_CONFIG. Need to use inst_idx, or it only will set VCN0. Signed-off-by: Emily Deng Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c87263ed20ec..b16c56aa2d22 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -575,8 +575,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( - UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) From e5fc78252ccd8dfc260f87d83905e9dffff6d975 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Mon, 14 Mar 2022 12:07:14 -0400 Subject: [PATCH 038/708] drm/amd/display: Add support for zstate during extended vblank [why] When we enter FREESYNC_STATE_VIDEO, we want to use the extra vblank portion to enter zstate if possible. [how] When we enter freesync, a full update is triggered and the new vtotal with extra lines is passed to dml in a stream update. The time gained from extra vblank lines is calculated in microseconds. We allow zstate entry if the time gained is greater than 5 ms, which is the current policy. Furthermore, an optimized value for min_dst_y_next_start is calculated and written to its register. When exiting freesync, another full update is triggered and default values are restored. Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 6 +++++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 ++ .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 12 ++++++++++++ .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c | 8 ++++++++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 18 +++++++++++++++--- .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 13 +++++++++++++ .../amd/display/dc/dml/display_mode_structs.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 3 +++ 10 files changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 75f9c97bebb0..f2ad8f58e69c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2393,6 +2393,8 @@ static enum surface_update_type check_update_surfaces_for_stream( if (stream_update->mst_bw_update) su_flags->bits.mst_bw = 1; + if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc)) + su_flags->bits.crtc_timing_adjust = 1; if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2654,6 +2656,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4055,3 +4060,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); } +/* + * dc_extended_blank_supported: Decide whether extended blank is supported + * + * Extended blank is a freesync optimization feature to be enabled in the future. + * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. + * + * @param [in] dc: Current DC state + * @return: Indicate whether extended blank is supported (true or false) + */ +bool dc_extended_blank_supported(struct dc *dc) +{ + return dc->debug.extended_blank_optimization && !dc->debug.disable_z10 + && dc->caps.zstate_support && dc->caps.is_apu; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4ffab7bb1098..9f4d926d54e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -188,6 +188,7 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; + bool zstate_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; @@ -703,13 +704,14 @@ struct dc_debug_options { bool enable_driver_sequence_debug; enum det_size crb_alloc_policy; int crb_alloc_policy_min_disp_count; -#if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; +#if defined(CONFIG_DRM_AMD_DC_DCN) bool enable_z9_disable_interface; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif bool apply_vendor_specific_lttpr_wa; + bool extended_blank_optimization; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; @@ -1369,6 +1371,8 @@ struct dc_sink_init_data { bool converter_disable_audio; }; +bool dc_extended_blank_supported(struct dc *dc); + struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params); /* Newer interfaces */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 99a750f561f8..c4168c11257c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -131,6 +131,7 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; + uint32_t crtc_timing_adjust : 1; } bits; uint32_t raw; @@ -289,6 +290,7 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; + struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index ab910deed481..4290eaf11a04 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1857,6 +1857,7 @@ void dcn20_optimize_bandwidth( struct dc_state *context) { struct hubbub *hubbub = dc->res_pool->hubbub; + int i; /* program dchubbub watermarks */ hubbub->funcs->program_watermarks(hubbub, @@ -1873,6 +1874,17 @@ void dcn20_optimize_bandwidth( dc->clk_mgr, context, true); + if (dc_extended_blank_supported(dc) && context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank + && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max + && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total) + pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, + pipe_ctx->dlg_regs.optimized_min_dst_y_next_start); + } + } /* increase compbuf size */ if (hubbub->funcs->program_compbuf_size) hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 53b792b997b7..8ae6117953ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -54,6 +54,13 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset); } +void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET(BLANK_OFFSET_1, 0, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized); +} + static struct hubp_funcs dcn31_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -80,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .set_unbounded_requesting = hubp31_set_unbounded_requesting, .hubp_soft_reset = hubp31_soft_reset, .hubp_in_blank = hubp1_in_blank, + .program_extended_blank = hubp31_program_extended_blank, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 89b7b6b7254a..338235bcef4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2232,6 +2232,7 @@ static bool dcn31_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.is_apu = true; + dc->caps.zstate_support = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 2f6122153bdb..f93af45aeab4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -722,8 +722,10 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; + unsigned int optimized_min_dst_y_next_start_us; plane_count = 0; + optimized_min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -744,11 +746,22 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; + if (dc_extended_blank_supported(dc)) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[0] + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) { + optimized_min_dst_y_next_start_us = + context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us; + break; + } + } + } /* zstate only supported on PWRSEQ0 and when there's <2 planes*/ if (link->link_index != 0 || stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) return DCN_ZSTATE_SUPPORT_ALLOW; else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr) return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; @@ -786,8 +799,6 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) @@ -843,6 +854,7 @@ void dcn20_calculate_dlg_params( &pipes[pipe_idx].pipe); pipe_idx++; } + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); } static void swizzle_to_dml_params( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index e0fecf127bd5..53d760e169e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -1055,6 +1055,7 @@ static void dml_rq_dlg_get_dlg_params( float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + int blank_lines; memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); @@ -1080,6 +1081,18 @@ static void dml_rq_dlg_get_dlg_params( dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); + blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1); + if (blank_lines < 0) + blank_lines = 0; + if (blank_lines != 0) { + disp_dlg_regs->optimized_min_dst_y_next_start_us = + ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; + disp_dlg_regs->optimized_min_dst_y_next_start = + (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2)); + } else { + // use unoptimized value + disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + } ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 59f0a61c33cf..2df660cd8801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -446,6 +446,8 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int refcyc_h_blank_end; unsigned int dlg_vblank_end; unsigned int min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start_us; unsigned int refcyc_per_htotal; unsigned int refcyc_x_after_scaler; unsigned int dst_y_after_scaler; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index e45b7993c5c5..ad69d78c4ac3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -195,6 +195,9 @@ struct hubp_funcs { void (*hubp_set_flip_int)(struct hubp *hubp); + void (*program_extended_blank)(struct hubp *hubp, + unsigned int min_dst_y_next_start_optimized); + void (*hubp_wait_pipe_read_start)(struct hubp *hubp); }; From a572f7055067d95455850fd242d8b54ff5786cac Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 22 Feb 2022 15:53:32 -0500 Subject: [PATCH 039/708] drm/amd/display: remove destructive verify link for TMDS [why and how] TMDS does not need destructive verify link Reviewed-by: Aric Cyr Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cb87dd643180..bbaa5abdf888 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -983,8 +983,7 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, destrictive = false; } } - } else if (dc_is_hdmi_signal(link->local_sink->sink_signal)) - destrictive = true; + } return destrictive; } From b7dfbd2e601f3fee545bc158feceba4f340fe7cf Mon Sep 17 00:00:00 2001 From: Tushar Patel Date: Thu, 17 Mar 2022 15:31:22 -0400 Subject: [PATCH 040/708] drm/amdkfd: Fix Incorrect VMIDs passed to HWS Compute-only GPUs have more than 8 VMIDs allocated to KFD. Fix this by passing correct number of VMIDs to HWS v2: squash in warning fix (Alex) Signed-off-by: Tushar Patel Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bb1c025d9001..b03663f42cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -680,7 +680,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 339e12c94cff..0887e26ce23b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -483,15 +483,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * From c5650327aba02d15cbd6a1846dcde9231493d549 Mon Sep 17 00:00:00 2001 From: Divya Shikre Date: Tue, 22 Mar 2022 15:00:12 -0400 Subject: [PATCH 041/708] drm/amdkfd: Check use_xgmi_p2p before reporting hive_id Recently introduced commit 158a05a0b885 ("drm/amdgpu: Add use_xgmi_p2p module parameter") did not update XGMI iolinks when use_xgmi_p2p is disabled. Add fix to not create XGMI iolinks in KFD topology when this parameter is disabled. Fixes: 158a05a0b885 ("drm/amdgpu: Add use_xgmi_p2p module parameter") Signed-off-by: Divya Shikre Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0887e26ce23b..62aa6c9d5123 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -531,7 +531,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; + if (amdgpu_use_xgmi_p2p) + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; kfd->noretry = kfd->adev->gmc.noretry; From 6ea239adc2a712eb318f04f5c29b018ba65ea38a Mon Sep 17 00:00:00 2001 From: Tianci Yin Date: Wed, 23 Mar 2022 23:54:58 +0800 Subject: [PATCH 042/708] drm/amdgpu/vcn: improve vcn dpg stop procedure Prior to disabling dpg, VCN need unpausing dpg mode, or VCN will hang in S3 resuming. Reviewed-by: James Zhu Signed-off-by: Tianci Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index b16c56aa2d22..0d590183328f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1480,8 +1480,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, 0, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); From ebbb7bb9e80305820dc2328a371c1b35679f2667 Mon Sep 17 00:00:00 2001 From: QintaoShen Date: Thu, 24 Mar 2022 16:26:23 +0800 Subject: [PATCH 043/708] drm/amdkfd: Check for potential null return of kmalloc_array() As the kmalloc_array() may return null, the 'event_waiters[i].wait' would lead to null-pointer dereference. Therefore, it is better to check the return value of kmalloc_array() to avoid this confusion. Signed-off-by: QintaoShen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index deecccebe5b6..64f4a51cc880 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -749,6 +749,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); From 5f3854f1f4e211f494018160b348a1c16e58013f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 24 Mar 2022 18:04:00 -0400 Subject: [PATCH 044/708] drm/amdgpu: add more cases to noretry=1 Port current list from amd-staging-drm-next. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ca2cfb65f976..f7216afe8a6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -564,6 +564,9 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 3): + case IP_VERSION(9, 3, 0): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 7057c81773ac32fd0dba00e2bb869928f008d3e2 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 1 Mar 2022 22:31:40 +0800 Subject: [PATCH 045/708] drm/amdgpu: set noretry=1 for gc 10.3.6 this patch to set noretry=1 for gc 10.3.6. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index f7216afe8a6a..7021e8f390bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -567,6 +567,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): case IP_VERSION(9, 3, 0): + case IP_VERSION(10, 3, 6): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 609910db56e72e87755d9745442bfdfa009dc61b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 2 Mar 2022 15:18:18 -0500 Subject: [PATCH 046/708] drm/amdgpu: set noretry=1 for GFX 10.3.4 Retry faults are not supported on GFX 10.3.4. Signed-off-by: Felix Kuehling Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 7021e8f390bd..e1635a3f2553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -561,12 +561,13 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): - case IP_VERSION(9, 3, 0): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): /* * noretry = 0 will cause kfd page fault tests fail From 0dc386add50b07e1cf9341b4e6e4fea77295c98a Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 22 Mar 2022 16:25:29 +0800 Subject: [PATCH 047/708] drm/amdgpu: set noretry for gfx 10.3.7 Disable xnack on the gfx10.3.7 for the KFD test. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index e1635a3f2553..a66a0881a934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -569,6 +569,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. From 15f9cd4334c83716fa32647652a609e3ba6c998d Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 24 Mar 2022 16:14:16 +0800 Subject: [PATCH 048/708] drm/amdgpu/gfx10: enable gfx1037 clock counter retrieval function Enable gfx1037 clock counter retrieval function for KFDPerfCountersTest.ClockCountersBasicTest. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f4c6accd3226..a98b78e0b507 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7689,6 +7689,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); From 5f7b839d47dbc74cf4a07beeab5191f93678673e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Mon, 28 Mar 2022 10:48:59 +0800 Subject: [PATCH 049/708] SUNRPC: Return true/false (not 1/0) from bool functions Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. This fixes the following warnings from coccicheck: ./fs/nfsd/nfs2acl.c:289:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_accessres' with return type bool ./fs/nfsd/nfs2acl.c:252:9-10: WARNING: return of 0/1 in function 'nfsaclsvc_encode_getaclres' with return type bool Signed-off-by: Haowen Bai Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 367551bddfc6..b5760801d377 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -249,34 +249,34 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) int w; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; if (dentry == NULL || d_really_is_negative(dentry)) - return 1; + return true; inode = d_inode(dentry); if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; + return false; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 1; + return true; w -= PAGE_SIZE; } if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, resp->mask & NFS_ACL, 0)) - return 0; + return false; if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) - return 0; + return false; - return 1; + return true; } /* ACCESS */ @@ -286,17 +286,17 @@ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; } - return 1; + return true; } /* From af41d2866f7d75bbb38d487f6ec7770425d70e45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Mar 2022 09:32:26 +0200 Subject: [PATCH 050/708] powerpc/64: Fix build failure with allyesconfig in book3s_64_entry.S Using conditional branches between two files is hasardous, they may get linked too far from each other. arch/powerpc/kvm/book3s_64_entry.o:(.text+0x3ec): relocation truncated to fit: R_PPC64_REL14 (stub) against symbol `system_reset_common' defined in .text section in arch/powerpc/kernel/head_64.o Reorganise the code to use non conditional branches. Fixes: 89d35b239101 ("KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C") Signed-off-by: Christophe Leroy [mpe: Avoid odd-looking bne ., use named local labels] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/89cf27bf43ee07a0b2879b9e8e2f5cd6386a3645.1648366338.git.christophe.leroy@csgroup.eu --- arch/powerpc/kvm/book3s_64_entry.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 05e003eb5d90..e42d1c609e47 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) */ ld r10,HSTATE_SCRATCH0(r13) cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_common + beq .Lcall_machine_check_common cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET - beq system_reset_common + beq .Lcall_system_reset_common b . + +.Lcall_machine_check_common: + b machine_check_common + +.Lcall_system_reset_common: + b system_reset_common #endif From 21d139d73f776aed1e86f3175a1e9fb8a10930c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 29 Mar 2022 08:45:04 +0200 Subject: [PATCH 051/708] dma-buf/sync-file: fix logic error in new fence merge code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the array is empty because everything is signaled we can't use add_fence() to add something because that would filter the signaled fence again. Signed-off-by: Christian König Fixes: 519f490db07e ("dma-buf/sync-file: fix warning about fence containers") Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220329070001.134180-1-christian.koenig@amd.com --- drivers/dma-buf/sync_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index b8dea4ec123b..514d213261df 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -262,7 +262,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, } if (index == 0) - add_fence(fences, &index, dma_fence_get_stub()); + fences[index++] = dma_fence_get_stub(); if (num_fences > index) { struct dma_fence **tmp; From c42ee39c1e78224d3a81bdbe0600abe4581226ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 25 Mar 2022 16:38:54 +0100 Subject: [PATCH 052/708] dma-buf: handle empty dma_fence_arrays gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bug inside the new sync-file merge code created empty dma_fence_array instances. Warn about that and handle those without crashing. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220329070001.134180-2-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 52b85d292383..5c8a7084577b 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -159,6 +159,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence_array *array; size_t size = sizeof(*array); + WARN_ON(!num_fences || !fences); + /* Allocate the callback structures behind the array. */ size += num_fences * sizeof(struct dma_fence_array_cb); array = kzalloc(size, GFP_KERNEL); @@ -231,6 +233,9 @@ struct dma_fence *dma_fence_array_first(struct dma_fence *head) if (!array) return head; + if (!array->num_fences) + return NULL; + return array->fences[0]; } EXPORT_SYMBOL(dma_fence_array_first); From 9f8b577f7b43b2170628d6c537252785dcc2dcea Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 1 Mar 2022 15:11:35 +0100 Subject: [PATCH 053/708] Drivers: hv: vmbus: Deactivate sysctl_record_panic_msg by default in isolated guests hv_panic_page might contain guest-sensitive information, do not dump it over to Hyper-V by default in isolated guests. While at it, update some comments in hyperv_{panic,die}_event(). Reported-by: Dexuan Cui Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20220301141135.2232-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 12a2b37e87f3..a963b970ffb2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE && hyperv_report_reg()) { @@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (hyperv_report_reg()) hyperv_report_panic(regs, val, true); @@ -1546,14 +1546,20 @@ static int vmbus_bus_init(void) if (ret) goto err_connect; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { u64 hyperv_crash_ctl; /* - * Sysctl registration is not fatal, since by default - * reporting is enabled. + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. */ hv_ctl_table_hdr = register_sysctl_table(hv_root_table); if (!hv_ctl_table_hdr) From 3a5469582c241abca22500f36a9cb8e9331969cf Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 15 Mar 2022 15:10:53 +0100 Subject: [PATCH 054/708] Drivers: hv: vmbus: Fix initialization of device object in vmbus_device_register() Initialize the device's dma_{mask,parms} pointers and the device's dma_mask value before invoking device_register(). Address the following trace with 5.17-rc7: [ 49.646839] WARNING: CPU: 0 PID: 189 at include/linux/dma-mapping.h:543 netvsc_probe+0x37a/0x3a0 [hv_netvsc] [ 49.646928] Call Trace: [ 49.646930] [ 49.646935] vmbus_probe+0x40/0x60 [hv_vmbus] [ 49.646942] really_probe+0x1ce/0x3b0 [ 49.646948] __driver_probe_device+0x109/0x180 [ 49.646952] driver_probe_device+0x23/0xa0 [ 49.646955] __device_attach_driver+0x76/0xe0 [ 49.646958] ? driver_allows_async_probing+0x50/0x50 [ 49.646961] bus_for_each_drv+0x84/0xd0 [ 49.646964] __device_attach+0xed/0x170 [ 49.646967] device_initial_probe+0x13/0x20 [ 49.646970] bus_probe_device+0x8f/0xa0 [ 49.646973] device_add+0x41a/0x8e0 [ 49.646975] ? hrtimer_init+0x28/0x80 [ 49.646981] device_register+0x1b/0x20 [ 49.646983] vmbus_device_register+0x5e/0xf0 [hv_vmbus] [ 49.646991] vmbus_add_channel_work+0x12d/0x190 [hv_vmbus] [ 49.646999] process_one_work+0x21d/0x3f0 [ 49.647002] worker_thread+0x4a/0x3b0 [ 49.647005] ? process_one_work+0x3f0/0x3f0 [ 49.647007] kthread+0xff/0x130 [ 49.647011] ? kthread_complete_and_exit+0x20/0x20 [ 49.647015] ret_from_fork+0x22/0x30 [ 49.647020] [ 49.647021] ---[ end trace 0000000000000000 ]--- Fixes: 743b237c3a7b0 ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220315141053.3223-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a963b970ffb2..df22fbdc2ae2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2103,6 +2103,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) child_device_obj->device.parent = &hv_acpi_dev->dev; child_device_obj->device.release = vmbus_device_release; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + /* * Register with the LDM. This will kick off the driver/device * binding...which will eventually call vmbus_match() and vmbus_probe() @@ -2128,9 +2132,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; - child_device_obj->device.dma_mask = &child_device_obj->dma_mask; - dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: From 792f232d57ff28bbd5f9c4abe0466b23d5879dc8 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Tue, 15 Mar 2022 17:35:35 -0300 Subject: [PATCH 055/708] Drivers: hv: vmbus: Fix potential crash on module unload The vmbus driver relies on the panic notifier infrastructure to perform some operations when a panic event is detected. Since vmbus can be built as module, it is required that the driver handles both registering and unregistering such panic notifier callback. After commit 74347a99e73a ("x86/Hyper-V: Unload vmbus channel in hv panic callback") though, the panic notifier registration is done unconditionally in the module initialization routine whereas the unregistering procedure is conditionally guarded and executes only if HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE capability is set. This patch fixes that by unconditionally unregistering the panic notifier in the module's exit routine as well. Fixes: 74347a99e73a ("x86/Hyper-V: Unload vmbus channel in hv panic callback") Signed-off-by: Guilherme G. Piccoli Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220315203535.682306-1-gpiccoli@igalia.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index df22fbdc2ae2..6c057c76c2ca 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2787,10 +2787,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; From 37200078ed6aa2ac3c88a01a64996133dccfdd34 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 24 Mar 2022 09:14:51 -0700 Subject: [PATCH 056/708] Drivers: hv: vmbus: Propagate VMbus coherence to each VMbus device VMbus synthetic devices are not represented in the ACPI DSDT -- only the top level VMbus device is represented. As a result, on ARM64 coherence information in the _CCA method is not specified for synthetic devices, so they default to not hardware coherent. Drivers for some of these synthetic devices have been recently updated to use the standard DMA APIs, and they are incurring extra overhead of unneeded software coherence management. Fix this by propagating coherence information from the VMbus node in ACPI to the individual synthetic devices. There's no effect on x86/x64 where devices are always hardware coherent. Signed-off-by: Michael Kelley Acked-by: Robin Murphy Link: https://lore.kernel.org/r/1648138492-2191-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/hv_common.c | 11 +++++++++++ drivers/hv/vmbus_drv.c | 31 +++++++++++++++++++++++++++++++ include/asm-generic/mshyperv.h | 1 + 3 files changed, 43 insertions(+) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 181d16bbf49d..820e81406251 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -216,6 +217,16 @@ bool hv_query_ext_cap(u64 cap_query) } EXPORT_SYMBOL_GPL(hv_query_ext_cap); +void hv_setup_dma_ops(struct device *dev, bool coherent) +{ + /* + * Hyper-V does not offer a vIOMMU in the guest + * VM, so pass 0/NULL for the IOMMU settings + */ + arch_setup_dma_ops(dev, 0, 0, NULL, coherent); +} +EXPORT_SYMBOL_GPL(hv_setup_dma_ops); + bool hv_is_hibernation_supported(void) { return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 6c057c76c2ca..3cd0d3a44fa2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device) return ret; } +/* + * vmbus_dma_configure -- Configure DMA coherence for VMbus device + */ +static int vmbus_dma_configure(struct device *child_device) +{ + /* + * On ARM64, propagate the DMA coherence setting from the top level + * VMbus ACPI device to the child VMbus device being added here. + * On x86/x64 coherence is assumed and these calls have no effect. + */ + hv_setup_dma_ops(child_device, + device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); + return 0; +} + /* * vmbus_remove - Remove a vmbus device */ @@ -1040,6 +1055,7 @@ static struct bus_type hv_bus = { .remove = vmbus_remove, .probe = vmbus_probe, .uevent = vmbus_uevent, + .dma_configure = vmbus_dma_configure, .dev_groups = vmbus_dev_groups, .drv_groups = vmbus_drv_groups, .bus_groups = vmbus_bus_groups, @@ -2435,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device) hv_acpi_dev = device; + /* + * Older versions of Hyper-V for ARM64 fail to include the _CCA + * method on the top level VMbus device in the DSDT. But devices + * are hardware coherent in all current Hyper-V use cases, so fix + * up the ACPI device to behave as if _CCA is present and indicates + * hardware coherence. + */ + ACPI_COMPANION_SET(&device->dev, device); + if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && + device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { + pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); + device->flags.cca_seen = true; + device->flags.coherent_dma = true; + } + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c08758b6b364..c05d2ce9b6cd 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); +void hv_setup_dma_ops(struct device *dev, bool coherent); void *hv_map_memory(void *addr, unsigned long size); void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ From 8d21732475c637c7efcdb91dc927a4c594e97898 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 24 Mar 2022 09:14:52 -0700 Subject: [PATCH 057/708] PCI: hv: Propagate coherence from VMbus device to PCI device PCI pass-thru devices in a Hyper-V VM are represented as a VMBus device and as a PCI device. The coherence of the VMbus device is set based on the VMbus node in ACPI, but the PCI device has no ACPI node and defaults to not hardware coherent. This results in extra software coherence management overhead on ARM64 when devices are hardware coherent. Fix this by setting up the PCI host bus so that normal PCI mechanisms will propagate the coherence of the VMbus device to the PCI device. There's no effect on x86/x64 where devices are always hardware coherent. Signed-off-by: Michael Kelley Acked-by: Boqun Feng Acked-by: Robin Murphy Link: https://lore.kernel.org/r/1648138492-2191-3-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ae0bc2fee4ca..88b3b56d0522 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3404,6 +3404,15 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr = dom; #ifdef CONFIG_X86 hbus->sysdata.domain = dom; +#elif defined(CONFIG_ARM64) + /* + * Set the PCI bus parent to be the corresponding VMbus + * device. Then the VMbus device will be assigned as the + * ACPI companion in pcibios_root_bridge_prepare() and + * pci_dma_configure() will propagate device coherence + * information to devices created on the bus. + */ + hbus->sysdata.parent = hdev->device.parent; #endif hbus->hdev = hdev; From b6cae15b5710c8097aad26a2e5e752c323ee5348 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 27 Mar 2022 08:25:10 -0700 Subject: [PATCH 058/708] Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer When reading a packet from a host-to-guest ring buffer, there is no memory barrier between reading the write index (to see if there is a packet to read) and reading the contents of the packet. The Hyper-V host uses store-release when updating the write index to ensure that writes of the packet data are completed first. On the guest side, the processor can reorder and read the packet data before the write index, and sometimes get stale packet data. Getting such stale packet data has been observed in a reproducible case in a VM on ARM64. Fix this by using virt_load_acquire() to read the write index, ensuring that reads of the packet data cannot be reordered before it. Preventing such reordering is logically correct, and with this change, getting stale data can no longer be reproduced. Signed-off-by: Michael Kelley Reviewed-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/1648394710-33480-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/ring_buffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 71efacb90965..3d215d9dec43 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; From 31818213170caa51d116eb5dc1167b88523b4fe1 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Sun, 27 Mar 2022 23:36:25 +0100 Subject: [PATCH 059/708] netfilter: bitwise: fix reduce comparisons The `nft_bitwise_reduce` and `nft_bitwise_fast_reduce` functions should compare the bitwise operation in `expr` with the tracked operation associated with the destination register of `expr`. However, instead of being called on `expr` and `track->regs[priv->dreg].selector`, `nft_expr_priv` is called on `expr` twice, so both reduce functions return true even when the operations differ. Fixes: be5650f8f47e ("netfilter: nft_bitwise: track register operations") Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 38caa66632b4..f590ee1c8a1b 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -290,7 +290,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, if (!track->regs[priv->sreg].selector) return false; - bitwise = nft_expr_priv(expr); + bitwise = nft_expr_priv(track->regs[priv->dreg].selector); if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && track->regs[priv->sreg].num_reg == 0 && track->regs[priv->dreg].bitwise && @@ -442,7 +442,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, if (!track->regs[priv->sreg].selector) return false; - bitwise = nft_expr_priv(expr); + bitwise = nft_expr_priv(track->regs[priv->dreg].selector); if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && track->regs[priv->dreg].bitwise && track->regs[priv->dreg].bitwise->ops == expr->ops && From ab0fc21bc7105b54bafd85bd8b82742f9e68898a Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:07 +0800 Subject: [PATCH 060/708] Revert "NFSv4: Handle the special Linux file open access mode" This reverts commit 44942b4e457beda00981f616402a1a791e8c616e. After secondly opening a file with O_ACCMODE|O_DIRECT flags, nfs4_valid_open_stateid() will dereference NULL nfs4_state when lseek(). Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) 5. lseek(fd) Reported-by: Lyu Tao Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/nfs4file.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e51d86707fca..e72900c059ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } -EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d258933cf8c8..f336d0a4190e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -51,7 +51,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) return err; if ((openflags & O_ACCMODE) == 3) - return nfs_open(inode, filp); + openflags--; /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); From b243874f6f9568b2daf1a00e9222cacdc15e159c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 29 Mar 2022 19:32:08 +0800 Subject: [PATCH 061/708] NFSv4: fix open failure with O_ACCMODE flag open() with O_ACCMODE|O_DIRECT flags secondly will fail. Reproducer: 1. mount -t nfs -o vers=4.2 $server_ip:/ /mnt/ 2. fd = open("/mnt/file", O_ACCMODE|O_DIRECT|O_CREAT) 3. close(fd) 4. fd = open("/mnt/file", O_ACCMODE|O_DIRECT) Server nfsd4_decode_share_access() will fail with error nfserr_bad_xdr when client use incorrect share access mode of 0. Fix this by using NFS4_SHARE_ACCESS_BOTH share access mode in client, just like firstly opening. Fixes: ce4ef7c0a8a05 ("NFS: Split out NFS v4 file operations") Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 ---------- fs/nfs/internal.h | 10 ++++++++++ fs/nfs/nfs4file.c | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bac4cf1a308e..0365063b85a2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1991,16 +1991,6 @@ const struct dentry_operations nfs4_dentry_operations = { }; EXPORT_SYMBOL_GPL(nfs4_dentry_operations); -static fmode_t flags_to_mode(int flags) -{ - fmode_t res = (__force fmode_t)flags & FMODE_EXEC; - if ((flags & O_ACCMODE) != O_WRONLY) - res |= FMODE_READ; - if ((flags & O_ACCMODE) != O_RDONLY) - res |= FMODE_WRITE; - return res; -} - static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 57b0497105c8..7eefa16ed381 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) return true; } +static inline fmode_t flags_to_mode(int flags) +{ + fmode_t res = (__force fmode_t)flags & FMODE_EXEC; + if ((flags & O_ACCMODE) != O_WRONLY) + res |= FMODE_READ; + if ((flags & O_ACCMODE) != O_RDONLY) + res |= FMODE_WRITE; + return res; +} + /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f336d0a4190e..7b861e4f0533 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; + fmode_t f_mode; struct iattr attr; int err; @@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (err) return err; + f_mode = filp->f_mode; if ((openflags & O_ACCMODE) == 3) - openflags--; + f_mode |= flags_to_mode(openflags); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); @@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; From eb07d5a4da041fd2e30e386e5fd12d23bb31cf9e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 30 Mar 2022 11:48:37 +1100 Subject: [PATCH 062/708] SUNRPC: handle malloc failure in ->request_prepare If ->request_prepare() detects an error, it sets ->rq_task->tk_status. This is easy for callers to ignore. The only caller is xprt_request_enqueue_receive() and it does ignore the error, as does call_encode() which calls it. This can result in a request being queued to receive a reply without an allocated receive buffer. So instead of setting rq_task->tk_status, return an error, and store in ->tk_status only in call_encode(); The call to xprt_request_enqueue_receive() is now earlier in call_encode(), where the error can still be handled. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++--- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/xprt.c | 23 +++++++++++++++-------- net/sunrpc/xprtsock.c | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eef5e87c03b4..f171f8c09e13 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -144,7 +144,7 @@ struct rpc_xprt_ops { unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - void (*prepare_request)(struct rpc_rqst *req); + int (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -357,10 +357,9 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); -void xprt_request_enqueue_receive(struct rpc_task *task); +int xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_request_dequeue_xprt(struct rpc_task *task); bool xprt_request_need_retransmit(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8bf2af8546d2..3c7407104d54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1858,6 +1858,9 @@ call_encode(struct rpc_task *task) xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); + /* Add task to reply queue before transmission to avoid races */ + if (task->tk_status == 0 && rpc_reply_expected(task)) + task->tk_status = xprt_request_enqueue_receive(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1881,9 +1884,6 @@ call_encode(struct rpc_task *task) return; } - /* Add task to reply queue before transmission to avoid races */ - if (rpc_reply_expected(task)) - xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: task->tk_action = call_transmit; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 880bfe8dc7f6..73344ffb2692 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -69,10 +69,11 @@ /* * Local functions */ -static void xprt_init(struct rpc_xprt *xprt, struct net *net); +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); -static void xprt_destroy(struct rpc_xprt *xprt); -static void xprt_request_init(struct rpc_task *task); +static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); +static int xprt_request_prepare(struct rpc_rqst *req); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -1143,16 +1144,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) * @task: RPC task * */ -void +int xprt_request_enqueue_receive(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + int ret; if (!xprt_request_need_enqueue_receive(task, req)) - return; + return 0; - xprt_request_prepare(task->tk_rqstp); + ret = xprt_request_prepare(task->tk_rqstp); + if (ret) + return ret; spin_lock(&xprt->queue_lock); /* Update the softirq receive buffer */ @@ -1166,6 +1170,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); + return 0; } /** @@ -1452,14 +1457,16 @@ xprt_request_dequeue_xprt(struct rpc_task *task) * * Calls into the transport layer to do whatever is needed to prepare * the request for transmission or receive. + * Returns error, or zero. */ -void +static int xprt_request_prepare(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; if (xprt->ops->prepare_request) - xprt->ops->prepare_request(req); + return xprt->ops->prepare_request(req); + return 0; } /** diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 78af7518f263..9b75891b3cc0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -822,11 +822,11 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) return ret; } -static void +static int xs_stream_prepare_request(struct rpc_rqst *req) { xdr_free_bvec(&req->rq_rcv_buf); - req->rq_task->tk_status = xdr_alloc_bvec( + return xdr_alloc_bvec( &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); } From 87d663d40801dffc99a5ad3b0188ad3e2b4d1557 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 22 Mar 2022 14:57:02 +0900 Subject: [PATCH 063/708] scsi: mpt3sas: Fix use after free in _scsih_expander_node_remove() The function mpt3sas_transport_port_remove() called in _scsih_expander_node_remove() frees the port field of the sas_expander structure, leading to the following use-after-free splat from KASAN when the ioc_info() call following that function is executed (e.g. when doing rmmod of the driver module): [ 3479.371167] ================================================================== [ 3479.378496] BUG: KASAN: use-after-free in _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.386936] Read of size 1 at addr ffff8881c037691c by task rmmod/1531 [ 3479.393524] [ 3479.395035] CPU: 18 PID: 1531 Comm: rmmod Not tainted 5.17.0-rc8+ #1436 [ 3479.401712] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.1 06/02/2021 [ 3479.409263] Call Trace: [ 3479.411743] [ 3479.413875] dump_stack_lvl+0x45/0x59 [ 3479.417582] print_address_description.constprop.0+0x1f/0x120 [ 3479.423389] ? _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.429469] kasan_report.cold+0x83/0xdf [ 3479.433438] ? _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.439514] _scsih_expander_node_remove+0x710/0x750 [mpt3sas] [ 3479.445411] ? _raw_spin_unlock_irqrestore+0x2d/0x40 [ 3479.452032] scsih_remove+0x525/0xc90 [mpt3sas] [ 3479.458212] ? mpt3sas_expander_remove+0x1d0/0x1d0 [mpt3sas] [ 3479.465529] ? down_write+0xde/0x150 [ 3479.470746] ? up_write+0x14d/0x460 [ 3479.475840] ? kernfs_find_ns+0x137/0x310 [ 3479.481438] pci_device_remove+0x65/0x110 [ 3479.487013] __device_release_driver+0x316/0x680 [ 3479.493180] driver_detach+0x1ec/0x2d0 [ 3479.498499] bus_remove_driver+0xe7/0x2d0 [ 3479.504081] pci_unregister_driver+0x26/0x250 [ 3479.510033] _mpt3sas_exit+0x2b/0x6cf [mpt3sas] [ 3479.516144] __x64_sys_delete_module+0x2fd/0x510 [ 3479.522315] ? free_module+0xaa0/0xaa0 [ 3479.527593] ? __cond_resched+0x1c/0x90 [ 3479.532951] ? lockdep_hardirqs_on_prepare+0x273/0x3e0 [ 3479.539607] ? syscall_enter_from_user_mode+0x21/0x70 [ 3479.546161] ? trace_hardirqs_on+0x1c/0x110 [ 3479.551828] do_syscall_64+0x35/0x80 [ 3479.556884] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3479.563402] RIP: 0033:0x7f1fc482483b ... [ 3479.943087] ================================================================== Fix this by introducing the local variable port_id to store the port ID value before executing mpt3sas_transport_port_remove(). This local variable is then used in the call to ioc_info() instead of dereferencing the freed port structure. Link: https://lore.kernel.org/r/20220322055702.95276-1-damien.lemoal@opensource.wdc.com Fixes: 7d310f241001 ("scsi: mpt3sas: Get device objects using sas_address & portID") Cc: stable@vger.kernel.org Acked-by: Sreekanth Reddy Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 00792767c620..7e476f50935b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11035,6 +11035,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, { struct _sas_port *mpt3sas_port, *next; unsigned long flags; + int port_id; /* remove sibling ports attached to this expander */ list_for_each_entry_safe(mpt3sas_port, next, @@ -11055,6 +11056,8 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, mpt3sas_port->hba_port); } + port_id = sas_expander->port->port_id; + mpt3sas_transport_port_remove(ioc, sas_expander->sas_address, sas_expander->sas_address_parent, sas_expander->port); @@ -11062,7 +11065,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, "expander_remove: handle(0x%04x), sas_addr(0x%016llx), port:%d\n", sas_expander->handle, (unsigned long long) sas_expander->sas_address, - sas_expander->port->port_id); + port_id); spin_lock_irqsave(&ioc->sas_node_lock, flags); list_del(&sas_expander->list); From ebfe3e0c5e805da3dd692bb120cd6269b7c19b80 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 17 Mar 2022 11:01:16 -0400 Subject: [PATCH 064/708] scsi: libiscsi: Remove unnecessary memset() in iscsi_conn_setup() iscsi_cls_conn is alloced by kzalloc(), the whole iscsi_cls_conn is zero filled already including the dd_data. So it is unnecessary to call memset again. Link: https://lore.kernel.org/r/20220317150116.194140-1-haowenchao@huawei.com Reviewed-by: Wu Bo Reviewed-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: Wenchao Hao Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d09926e6c8a8..cf4211c6500d 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3045,7 +3045,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, if (!cls_conn) return NULL; conn = cls_conn->dd_data; - memset(conn, 0, sizeof(*conn) + dd_size); conn->dd_data = cls_conn->dd_data + sizeof(*conn); conn->session = session; From a6968f7a367f128d120447360734344d5a3d5336 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Fri, 11 Mar 2022 21:22:05 +0800 Subject: [PATCH 065/708] scsi: target: tcmu: Fix possible page UAF tcmu_try_get_data_page() looks up pages under cmdr_lock, but it does not take refcount properly and just returns page pointer. When tcmu_try_get_data_page() returns, the returned page may have been freed by tcmu_blocks_release(). We need to get_page() under cmdr_lock to avoid concurrent tcmu_blocks_release(). Link: https://lore.kernel.org/r/20220311132206.24515-1-xiaoguang.wang@linux.alibaba.com Reviewed-by: Bodo Stroesser Signed-off-by: Xiaoguang Wang Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 95d4ca50a605..fd7267baa707 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1821,6 +1821,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi) mutex_lock(&udev->cmdr_lock); page = xa_load(&udev->data_pages, dpi); if (likely(page)) { + get_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1877,6 +1878,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) /* For the vmalloc()ed cmd area pages */ addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + get_page(page); } else { uint32_t dpi; @@ -1887,7 +1889,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - get_page(page); vmf->page = page; return 0; } From 35ed9613d83f3c1f011877d591fd7d36f2666106 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:34 -0700 Subject: [PATCH 066/708] scsi: lpfc: Improve PCI EEH Error and Recovery Handling Following EEH errors, the driver can crash or hang when deleting the localport or when attempting to unload. The EEH handlers in the driver did not notify the NVMe-FC transport before tearing the driver down. This was delayed until the resume steps. This worked for SCSI because lpfc_block_scsi() would notify the scsi_fc_transport that the target was not available but it would not clean up all the references to the ndlp. The SLI3 prep for dev reset handler did the lpfc_offline_prep() and lpfc_offline() calls to get the port stopped before restarting. The SLI4 version of the prep for dev reset just destroyed the queues and did not stop NVMe from continuing. Also because the port was not really stopped the localport destroy would hang because the transport was still waiting for I/O. Additionally, a devloss tmo can fire and post events to a stopped worker thread creating another hang condition. lpfc_sli4_prep_dev_for_reset() is modified to call lpfc_offline_prep() and lpfc_offline() rather than just lpfc_scsi_dev_block() to ensure both SCSI and NVMe transports are notified to block I/O to the driver. Logic is added to devloss handler and worker thread to clean up ndlp references and quiesce appropriately. Link: https://lore.kernel.org/r/20220317032737.45308-2-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 7 +- drivers/scsi/lpfc/lpfc_crtn.h | 3 + drivers/scsi/lpfc/lpfc_hbadisc.c | 119 +++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_init.c | 60 ++++++++++------ drivers/scsi/lpfc/lpfc_nvme.c | 11 ++- drivers/scsi/lpfc/lpfc_sli.c | 15 ++-- 6 files changed, 157 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 86653aa9b389..8405fd0bbc59 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -896,6 +896,11 @@ enum lpfc_irq_chann_mode { NHT_MODE, }; +enum lpfc_hba_bit_flags { + FABRIC_COMANDS_BLOCKED, + HBA_PCI_ERR, +}; + struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_io_buf * (*lpfc_get_scsi_buf) @@ -1042,7 +1047,6 @@ struct lpfc_hba { * Firmware supports Forced Link Speed * capability */ -#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */ #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_SHORT_CMF 0x200000 /* shorter CMF timer routine */ #define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */ @@ -1349,7 +1353,6 @@ struct lpfc_hba { atomic_t fabric_iocb_count; struct timer_list fabric_block_timer; unsigned long bit_flags; -#define FABRIC_COMANDS_BLOCKED 0 atomic_t num_rsrc_err; atomic_t num_cmd_success; unsigned long last_rsrc_error_time; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 96408cd6c4c8..9897a1aa387b 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -670,3 +670,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport, uint32_t hash, uint8_t *buf); void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport); int lpfc_issue_els_qfpa(struct lpfc_vport *vport); + +void lpfc_sli_rpi_release(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 0144da30e3db..6983c70f2fc6 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport) ndlp = rdata->pnode; if (!rdata->pnode) { - pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n", - __func__, rport, rport->scsi_target_id); + pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n", + __func__, rport, rport->scsi_target_id); return -EINVAL; } @@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "3181 dev_loss_callbk x%06x, rport x%px flg x%x " - "load_flag x%x refcnt %d\n", + "load_flag x%x refcnt %d state %d xpt x%x\n", ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag, - vport->load_flag, kref_read(&ndlp->kref)); + vport->load_flag, kref_read(&ndlp->kref), + ndlp->nlp_state, ndlp->fc4_xpt_flags); /* Don't schedule a worker thread event if the vport is going down. * The teardown process cleans up the node via lpfc_drop_node. @@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD; + /* clear the NLP_XPT_REGD if the node is not registered + * with nvme-fc + */ + if (ndlp->fc4_xpt_flags == NLP_XPT_REGD) + ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; /* Remove the node reference from remote_port_add now. * The driver will not call remote_port_delete. @@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; spin_unlock_irqrestore(&ndlp->lock, iflags); - /* We need to hold the node by incrementing the reference - * count until this queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); + if (phba->worker_thread) { + /* We need to hold the node by incrementing the reference + * count until this queued work is done + */ + evtp->evt_arg1 = lpfc_nlp_get(ndlp); + + spin_lock_irqsave(&phba->hbalock, iflags); + if (evtp->evt_arg1) { + evtp->evt = LPFC_EVT_DEV_LOSS; + list_add_tail(&evtp->evt_listp, &phba->work_list); + lpfc_worker_wake_up(phba); + } + spin_unlock_irqrestore(&phba->hbalock, iflags); + } else { + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "3188 worker thread is stopped %s x%06x, " + " rport x%px flg x%x load_flag x%x refcnt " + "%d\n", __func__, ndlp->nlp_DID, + ndlp->rport, ndlp->nlp_flag, + vport->load_flag, kref_read(&ndlp->kref)); + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) { + spin_lock_irqsave(&ndlp->lock, iflags); + /* Node is in dev loss. No further transaction. */ + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); + } - spin_lock_irqsave(&phba->hbalock, iflags); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_DEV_LOSS; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); } - spin_unlock_irqrestore(&phba->hbalock, iflags); return; } @@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, "0203 Devloss timeout on " "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x " - "NPort x%06x Data: x%x x%x x%x\n", + "NPort x%06x Data: x%x x%x x%x refcnt %d\n", *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_state, ndlp->nlp_rpi); + ndlp->nlp_state, ndlp->nlp_rpi, + kref_read(&ndlp->kref)); } else { lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT, "0204 Devloss timeout on " @@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba) int free_evt; int fcf_inuse; uint32_t nlp_did; + bool hba_pci_err; spin_lock_irq(&phba->hbalock); while (!list_empty(&phba->work_list)) { list_remove_head((&phba->work_list), evtp, typeof(*evtp), evt_listp); spin_unlock_irq(&phba->hbalock); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); free_evt = 1; switch (evtp->evt) { case LPFC_EVT_ELS_RETRY: ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); - lpfc_els_retry_delay_handler(ndlp); - free_evt = 0; /* evt is part of ndlp */ + if (!hba_pci_err) { + lpfc_els_retry_delay_handler(ndlp); + free_evt = 0; /* evt is part of ndlp */ + } /* decrement the node reference count held * for this queued work */ @@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba) break; case LPFC_EVT_RECOVER_PORT: ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); - lpfc_sli_abts_recover_port(ndlp->vport, ndlp); - free_evt = 0; + if (!hba_pci_err) { + lpfc_sli_abts_recover_port(ndlp->vport, ndlp); + free_evt = 0; + } /* decrement the node reference count held for * this queued work */ @@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba) struct lpfc_vport **vports; struct lpfc_vport *vport; int i; + bool hba_pci_err; + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); spin_lock_irq(&phba->hbalock); ha_copy = phba->work_ha; phba->work_ha = 0; spin_unlock_irq(&phba->hbalock); + if (hba_pci_err) + ha_copy = 0; /* First, try to post the next mailbox command to SLI4 device */ - if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) + if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err) lpfc_sli4_post_async_mbox(phba); if (ha_copy & HA_ERATT) { @@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba) lpfc_handle_latt(phba); /* Handle VMID Events */ - if (lpfc_is_vmid_enabled(phba)) { + if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) { if (phba->pport->work_port_events & WORKER_CHECK_VMID_ISSUE_QFPA) { lpfc_check_vmid_qfpa_issue(phba); @@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba) work_port_events = vport->work_port_events; vport->work_port_events &= ~work_port_events; spin_unlock_irq(&vport->work_port_lock); + if (hba_pci_err) + continue; if (work_port_events & WORKER_DISC_TMO) lpfc_disc_timeout_handler(vport); if (work_port_events & WORKER_ELS_TMO) @@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba) struct lpfc_vport **vports; LPFC_MBOXQ_t *mb; int i; + int offline; if (phba->link_state == LPFC_LINK_DOWN) return 0; /* Block all SCSI stack I/Os */ lpfc_scsi_dev_block(phba); + offline = pci_channel_offline(phba->pcidev); phba->defer_flogi_acc_flag = false; @@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba) lpfc_destroy_vport_work_array(phba, vports); /* Clean up any SLI3 firmware default rpi's */ - if (phba->sli_rev > LPFC_SLI_REV3) + if (phba->sli_rev > LPFC_SLI_REV3 || offline) goto skip_unreg_did; mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) spin_lock_irqsave(&ndlp->lock, iflags); if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) { spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "0999 %s Not regd: ndlp x%px rport x%px DID " + "x%x FLG x%x XPT x%x\n", + __func__, ndlp, ndlp->rport, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->fc4_xpt_flags); return; } @@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->fc4_xpt_flags & SCSI_XPT_REGD) { vport->phba->nport_event_cnt++; lpfc_unregister_remote_port(ndlp); + } else if (!ndlp->rport) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "1999 %s NDLP in devloss x%px DID x%x FLG x%x" + " XPT x%x refcnt %d\n", + __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->fc4_xpt_flags, + kref_read(&ndlp->kref)); } if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) { @@ -6097,12 +6148,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport) } } +/* + * lpfc_notify_xport_npr - notifies xport of node disappearance + * @vport: Pointer to Virtual Port object. + * + * Transitions all ndlps to NPR state. When lpfc_nlp_set_state + * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered + * and transport notified that the node is gone. + * Return Code: + * none + */ +static void +lpfc_notify_xport_npr(struct lpfc_vport *vport) +{ + struct lpfc_nodelist *ndlp, *next_ndlp; + + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } +} void lpfc_cleanup_discovery_resources(struct lpfc_vport *vport) { lpfc_els_flush_rscn(vport); lpfc_els_flush_cmd(vport); lpfc_disc_flush_list(vport); + if (pci_channel_offline(vport->phba->pcidev)) + lpfc_notify_xport_npr(vport); } /*****************************************************************************/ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index eed6464bd880..b8ab6dcbadc5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1642,7 +1642,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba) { spin_lock_irq(&phba->hbalock); if (phba->link_state == LPFC_HBA_ERROR && - phba->hba_flag & HBA_PCI_ERR) { + test_bit(HBA_PCI_ERR, &phba->bit_flags)) { spin_unlock_irq(&phba->hbalock); return; } @@ -3682,7 +3682,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) struct lpfc_vport **vports; struct Scsi_Host *shost; int i; - int offline = 0; + int offline; + bool hba_pci_err; if (vport->fc_flag & FC_OFFLINE_MODE) return; @@ -3692,6 +3693,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) lpfc_linkdown(phba); offline = pci_channel_offline(phba->pcidev); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); /* Issue an unreg_login to all nodes on all vports */ vports = lpfc_create_vport_work_array(phba); @@ -3715,11 +3717,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) ndlp->nlp_flag &= ~NLP_NPR_ADISC; spin_unlock_irq(&ndlp->lock); - if (offline) { + if (offline || hba_pci_err) { spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~(NLP_UNREG_INP | NLP_RPI_REGISTERED); spin_unlock_irq(&ndlp->lock); + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli_rpi_release(vports[i], + ndlp); } else { lpfc_unreg_rpi(vports[i], ndlp); } @@ -13374,15 +13379,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Disable FW logging to host memory */ lpfc_ras_stop_fwlog(phba); - /* Unset the queues shared with the hardware then release all - * allocated resources. - */ - lpfc_sli4_queue_unset(phba); - lpfc_sli4_queue_destroy(phba); - /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); + /* release all queue allocated resources. */ + lpfc_sli4_queue_destroy(phba); + /* Free RAS DMA memory */ if (phba->ras_fwlog.ras_enabled) lpfc_sli4_ras_dma_free(phba); @@ -15057,24 +15059,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba) static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "2826 PCI channel disable preparing for reset\n"); + int offline = pci_channel_offline(phba->pcidev); + + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2826 PCI channel disable preparing for reset offline" + " %d\n", offline); /* Block any management I/Os to the device */ lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT); - /* Block all SCSI devices' I/Os on the host */ - lpfc_scsi_dev_block(phba); + /* HBA_PCI_ERR was set in io_error_detect */ + lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); /* Flush all driver's outstanding I/Os as we are to reset */ lpfc_sli_flush_io_rings(phba); + lpfc_offline(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); + lpfc_sli4_queue_destroy(phba); /* Disable interrupt and pci device */ lpfc_sli4_disable_intr(phba); - lpfc_sli4_queue_destroy(phba); pci_disable_device(phba->pcidev); } @@ -15123,6 +15129,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) { struct Scsi_Host *shost = pci_get_drvdata(pdev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; + bool hba_pci_err; switch (state) { case pci_channel_io_normal: @@ -15130,17 +15137,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) lpfc_sli4_prep_dev_for_recover(phba); return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Fatal error, prepare for slot reset */ - lpfc_sli4_prep_dev_for_reset(phba); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2832 Already handling PCI error " + "state: x%x\n", state); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: - phba->hba_flag |= HBA_PCI_ERR; + set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Permanent failure, prepare for device down */ lpfc_sli4_prep_dev_for_perm_failure(phba); return PCI_ERS_RESULT_DISCONNECT; default: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); /* Unknown state, prepare and request slot reset */ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2825 Unknown PCI error state: x%x\n", state); @@ -15174,17 +15188,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; struct lpfc_sli *psli = &phba->sli; uint32_t intr_mode; + bool hba_pci_err; dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n"); if (pci_enable_device_mem(pdev)) { printk(KERN_ERR "lpfc: Cannot re-enable " - "PCI device after reset.\n"); + "PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } pci_restore_state(pdev); - phba->hba_flag &= ~HBA_PCI_ERR; + hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + dev_info(&pdev->dev, + "hba_pci_err was not set, recovering slot reset.\n"); /* * As the new kernel behavior of pci_restore_state() API call clears * device saved_state flag, need to save the restored state again. @@ -15239,8 +15257,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev) */ if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) { /* Perform device reset */ - lpfc_offline_prep(phba, LPFC_MBX_WAIT); - lpfc_offline(phba); lpfc_sli_brdrestart(phba); /* Bring the device back online */ lpfc_online(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 1213a299f9aa..e47205e0d3e2 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2169,8 +2169,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_nvme = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; - if (!vport || !vport->localport || - !qp || !qp->io_wq) + if (!vport->localport || !qp || !qp->io_wq) return; pring = qp->io_wq->pring; @@ -2180,8 +2179,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_scsi += qp->abts_scsi_io_bufs; abts_nvme += qp->abts_nvme_io_bufs; } - if (!vport || !vport->localport || - vport->phba->hba_flag & HBA_PCI_ERR) + if (!vport->localport || + test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) || + vport->load_flag & FC_UNLOADING) return; lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -2541,8 +2541,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * return values is ignored. The upcall is a courtesy to the * transport. */ - if (vport->load_flag & FC_UNLOADING || - unlikely(vport->phba->hba_flag & HBA_PCI_ERR)) + if (vport->load_flag & FC_UNLOADING) (void)nvme_fc_set_remoteport_devloss(remoteport, 0); ret = nvme_fc_unregister_remoteport(remoteport); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 20d40957a385..26f6a147b5ae 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2828,6 +2828,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; } +void +lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +{ + __lpfc_sli_rpi_release(vport, ndlp); +} + /** * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler * @phba: Pointer to HBA context object. @@ -4624,11 +4630,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) struct lpfc_iocbq *piocb, *next_iocb; spin_lock_irq(&phba->hbalock); - if (phba->hba_flag & HBA_IOQ_FLUSH || - !phba->sli4_hba.hdwq) { - spin_unlock_irq(&phba->hbalock); - return; - } /* Indicate the I/O queues are flushed */ phba->hba_flag |= HBA_IOQ_FLUSH; spin_unlock_irq(&phba->hbalock); @@ -10997,6 +10998,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, unsigned long iflags; int rc; + /* If the PCI channel is in offline state, do not post iocbs. */ + if (unlikely(pci_channel_offline(phba->pcidev))) + return IOCB_ERROR; + if (phba->sli_rev == LPFC_SLI_REV4) { lpfc_sli_prep_wqe(phba, piocb); From a4691038b4071ff0d9ae486d8822a2c0d41d5796 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:35 -0700 Subject: [PATCH 067/708] scsi: lpfc: Fix unload hang after back to back PCI EEH faults When injecting EEH errors the port is getting hung up waiting on the node list to empty, message number 0233. The driver is stuck at this point and also can't unload. The driver makes transport remoteport delete calls which try to abort I/O's, but the EEH daemon has already called the driver to detach and the detachment has set the global FC_UNLOADING flag. There are several code paths that will avoid I/O cleanup if the FC_UNLOADING flag is set, resulting in transports waiting for I/O while the driver is waiting on transports to clean up. Additionally, during study of the list, a locking issue was found in lpfc_sli_abort_iocb_ring that could corrupt the list. A special case was added to the lpfc_cleanup() routine to call lpfc_sli_flush_rings() if the driver is FC_UNLOADING and if the pci-slot is offline (e.g. EEH). The SLI4 part of lpfc_sli_abort_iocb_ring() is changed to use the ring_lock. Also added code to cancel the I/Os if the pci-slot is offline and added checks and returns for the FC_UNLOADING and HBA_IOQ_FLUSH flags to prevent trying to send an I/O that we cannot handle. Link: https://lore.kernel.org/r/20220317032737.45308-3-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 1 + drivers/scsi/lpfc/lpfc_init.c | 26 +++++++++++++++-- drivers/scsi/lpfc/lpfc_nvme.c | 16 ++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 50 ++++++++++++++++++++++---------- 4 files changed, 72 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 6983c70f2fc6..2b877dff5ed4 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -5422,6 +5422,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; mempool_free(mbox, phba->mbox_mem_pool); acc_plogi = 1; + lpfc_nlp_put(ndlp); } } else { lpfc_printf_vlog(vport, KERN_INFO, diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b8ab6dcbadc5..cf83bc0e27c0 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -95,6 +95,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *); static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *); +static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1985,6 +1986,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) if (pci_channel_offline(phba->pcidev)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "3166 pci channel is offline\n"); + lpfc_sli_flush_io_rings(phba); return; } @@ -2973,6 +2975,22 @@ lpfc_cleanup(struct lpfc_vport *vport) NLP_EVT_DEVICE_RM); } + /* This is a special case flush to return all + * IOs before entering this loop. There are + * two points in the code where a flush is + * avoided if the FC_UNLOADING flag is set. + * one is in the multipool destroy, + * (this prevents a crash) and the other is + * in the nvme abort handler, ( also prevents + * a crash). Both of these exceptions are + * cases where the slot is still accessible. + * The flush here is only when the pci slot + * is offline. + */ + if (vport->load_flag & FC_UNLOADING && + pci_channel_offline(phba->pcidev)) + lpfc_sli_flush_io_rings(vport->phba); + /* At this point, ALL ndlp's should be gone * because of the previous NLP_EVT_DEVICE_RM. * Lets wait for this to happen, if needed. @@ -2985,7 +3003,7 @@ lpfc_cleanup(struct lpfc_vport *vport) list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { lpfc_printf_vlog(ndlp->vport, KERN_ERR, - LOG_TRACE_EVENT, + LOG_DISCOVERY, "0282 did:x%x ndlp:x%px " "refcnt:%d xflags x%x nflag x%x\n", ndlp->nlp_DID, (void *)ndlp, @@ -13359,8 +13377,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Abort all iocbs associated with the hba */ lpfc_sli_hba_iocb_abort(phba); - /* Wait for completion of device XRI exchange busy */ - lpfc_sli4_xri_exchange_busy_wait(phba); + if (!pci_channel_offline(phba->pcidev)) + /* Wait for completion of device XRI exchange busy */ + lpfc_sli4_xri_exchange_busy_wait(phba); /* per-phba callback de-registration for hotplug event */ if (phba->pport) @@ -14264,6 +14283,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba) "2711 PCI channel permanent disable for failure\n"); /* Block all SCSI devices' I/Os on the host */ lpfc_scsi_dev_block(phba); + lpfc_sli4_prep_dev_for_reset(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index e47205e0d3e2..8d26f207ebd2 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -93,6 +93,11 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport, lport = (struct lpfc_nvme_lport *)pnvme_lport->private; vport = lport->vport; + + if (!vport || vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; + qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL); if (qhandle == NULL) return -ENOMEM; @@ -267,7 +272,8 @@ lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, return -EINVAL; remoteport = lpfc_rport->remoteport; - if (!vport->localport) + if (!vport->localport || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -EINVAL; lport = vport->localport->private; @@ -559,6 +565,8 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_DID, ntype, nstate); return -ENODEV; } + if (vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; if (!vport->phba->sli4_hba.nvmels_wq) return -ENOMEM; @@ -662,7 +670,8 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, return -EINVAL; vport = lport->vport; - if (vport->load_flag & FC_UNLOADING) + if (vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -ENODEV; atomic_inc(&lport->fc4NvmeLsRequests); @@ -1516,7 +1525,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; - if (unlikely(vport->load_flag & FC_UNLOADING)) { + if ((unlikely(vport->load_flag & FC_UNLOADING)) || + phba->hba_flag & HBA_IOQ_FLUSH) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR, "6124 Fail IO, Driver unload\n"); atomic_inc(&lport->xmt_fcp_err); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 26f6a147b5ae..70c3929a9fb4 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4542,42 +4542,62 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, void lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - LIST_HEAD(completions); + LIST_HEAD(tx_completions); + LIST_HEAD(txcmplq_completions); struct lpfc_iocbq *iocb, *next_iocb; + int offline; if (pring->ringno == LPFC_ELS_RING) { lpfc_fabric_abort_hba(phba); } + offline = pci_channel_offline(phba->pcidev); /* Error everything on txq and txcmplq * First do the txq. */ if (phba->sli_rev >= LPFC_SLI_REV4) { spin_lock_irq(&pring->ring_lock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - spin_unlock_irq(&pring->ring_lock); - spin_lock_irq(&phba->hbalock); - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); - spin_unlock_irq(&phba->hbalock); + if (offline) { + list_splice_init(&pring->txcmplq, + &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } + spin_unlock_irq(&pring->ring_lock); } else { spin_lock_irq(&phba->hbalock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); + if (offline) { + list_splice_init(&pring->txcmplq, &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } spin_unlock_irq(&phba->hbalock); } - /* Make sure HBA is alive */ - lpfc_issue_hb_tmo(phba); + if (offline) { + /* Cancel all the IOCBs from the completions list */ + lpfc_sli_cancel_iocbs(phba, &txcmplq_completions, + IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); + } else { + /* Make sure HBA is alive */ + lpfc_issue_hb_tmo(phba); + } /* Cancel all the IOCBs from the completions list */ - lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, + lpfc_sli_cancel_iocbs(phba, &tx_completions, IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); } From df0101197c4d9596682901631f3ee193ed354873 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:36 -0700 Subject: [PATCH 068/708] scsi: lpfc: Fix queue failures when recovering from PCI parity error When recovering from a pci-parity error the driver is failing to re-create queues, causing recovery to fail. Looking deeper, it was found that the interrupt vector count allocated on the recovery was fewer than the vectors originally allocated. This disparity resulted in CPU map entries with stale information. When the driver tries to re-create the queues, it attempts to use the stale information which indicates an eq/interrupt vector that was no longer created. Fix by clearng the cpup map array before enabling and requesting the IRQs in the lpfc_sli_reset_slot_s4 routine(). Link: https://lore.kernel.org/r/20220317032737.45308-4-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index cf83bc0e27c0..461d333b1b3a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -15236,6 +15236,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { From 4f3beb36b1e46f1a04967a84e3a1bd55bf7a9e7a Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Mar 2022 20:27:37 -0700 Subject: [PATCH 069/708] scsi: lpfc: Update lpfc version to 14.2.0.1 Update lpfc version to 14.2.0.1 Link: https://lore.kernel.org/r/20220317032737.45308-5-jsmart2021@gmail.com Co-developed-by: Justin Tee Signed-off-by: Justin Tee Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index e52f37e5d896..a4d3259b8c52 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.2.0.0" +#define LPFC_DRIVER_VERSION "14.2.0.1" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 7294a9bcaa7ee0d3b96aab1a277317315fd46f09 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 23 Mar 2022 13:55:44 -0700 Subject: [PATCH 070/708] scsi: lpfc: Fix broken SLI4 abort path There was a merge error in ther 14.2.0.0 patches that resulted in the SLI4 path using the SLI3 issue_abort_iotag() routine. This resulted in txcmplq corruption. Fix to use the SLI4 routine when SLI4. Link: https://lore.kernel.org/r/20220323205545.81814-2-jsmart2021@gmail.com Fixes: 31a59f75702f ("scsi: lpfc: SLI path split: Refactor Abort paths") Cc: # v5.2+ Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 3c132604fd91..ba9dbb51b75f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5929,13 +5929,15 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) } lpfc_cmd->waitq = &waitq; - if (phba->sli_rev == LPFC_SLI_REV4) + if (phba->sli_rev == LPFC_SLI_REV4) { spin_unlock(&pring_s4->ring_lock); - else + ret_val = lpfc_sli4_issue_abort_iotag(phba, iocb, + lpfc_sli_abort_fcp_cmpl); + } else { pring = &phba->sli.sli3_ring[LPFC_FCP_RING]; - - ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb, - lpfc_sli_abort_fcp_cmpl); + ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb, + lpfc_sli_abort_fcp_cmpl); + } /* Make sure HBA is alive */ lpfc_issue_hb_tmo(phba); From c26bd6602e1d348bfa754dc55e5608c922dd2801 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 23 Mar 2022 13:55:45 -0700 Subject: [PATCH 071/708] scsi: lpfc: Fix locking for lpfc_sli_iocbq_lookup() The rules changed for lpfc_sli_iocbq_lookup() vs locking. Prior, the routine properly took out the lock. In newly refactored code, the locks must be held when calling the routine. Fix lpfc_sli_process_sol_iocb() to take the locks before calling the routine. Fix lpfc_sli_handle_fast_ring_event() to not release the locks to call the routine. Link: https://lore.kernel.org/r/20220323205545.81814-3-jsmart2021@gmail.com Fixes: 1b64aa9eae28 ("scsi: lpfc: SLI path split: Refactor fast and slow paths to native SLI4") Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 70c3929a9fb4..bda2a7ba4e77 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -3721,7 +3721,15 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, unsigned long iflag; u32 ulp_command, ulp_status, ulp_word4, ulp_context, iotag; + if (phba->sli_rev == LPFC_SLI_REV4) + spin_lock_irqsave(&pring->ring_lock, iflag); + else + spin_lock_irqsave(&phba->hbalock, iflag); cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq); + if (phba->sli_rev == LPFC_SLI_REV4) + spin_unlock_irqrestore(&pring->ring_lock, iflag); + else + spin_unlock_irqrestore(&phba->hbalock, iflag); ulp_command = get_job_cmnd(phba, saveq); ulp_status = get_job_ulpstatus(phba, saveq); @@ -4058,10 +4066,8 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba, break; } - spin_unlock_irqrestore(&phba->hbalock, iflag); cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring, &rspiocbq); - spin_lock_irqsave(&phba->hbalock, iflag); if (unlikely(!cmdiocbq)) break; if (cmdiocbq->cmd_flag & LPFC_DRIVER_ABORTED) From 8ee15ea779c332f94d74d135212403f2d0defb5f Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Thu, 24 Mar 2022 16:01:46 +0900 Subject: [PATCH 072/708] scsi: ufs: core: Remove unused field in struct ufs_hba Remove unused fields 'rpm_lvl_attr' and 'spm_lvl_attr' in struct ufs_hba. Commit cbb6813ee771 ("scsi: ufs: sysfs: attribute group for existing sysfs entries.") removed all code using that field. Link: https://lore.kernel.org/r/413601558.101648105683746.JavaMail.epsvc@epcpadp4 Reviewed-by: Bart Van Assche Acked-by: Avri Altman Signed-off-by: Keoseong Park Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 88c20f3608c2..94f545be183a 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -820,8 +820,6 @@ struct ufs_hba { enum ufs_pm_level rpm_lvl; /* Desired UFS power management level during system PM */ enum ufs_pm_level spm_lvl; - struct device_attribute rpm_lvl_attr; - struct device_attribute spm_lvl_attr; int pm_op_in_progress; /* Auto-Hibernate Idle Timer register value */ From f06aa52cb2723ec67e92df463827b800d6c477d1 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Thu, 24 Mar 2022 14:46:03 +0100 Subject: [PATCH 073/708] scsi: core: scsi_logging: Fix a BUG The request_queue may be NULL in a request, for example when it comes from scsi_ioctl_reset(). Check it before use. Fixes: f3fa33acca9f ("block: remove the ->rq_disk field in struct request") Link: https://lore.kernel.org/r/20220324134603.28463-1-thenzl@redhat.com Reported-by: Changhui Zhong Reviewed-by: Christoph Hellwig Signed-off-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_logging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_logging.c b/drivers/scsi/scsi_logging.c index ff89de86545d..b02af340c2d3 100644 --- a/drivers/scsi/scsi_logging.c +++ b/drivers/scsi/scsi_logging.c @@ -30,7 +30,7 @@ static inline const char *scmd_name(const struct scsi_cmnd *scmd) { struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd); - if (!rq->q->disk) + if (!rq->q || !rq->q->disk) return NULL; return rq->q->disk->disk_name; } From f16aa285e6185271bc6812a176f7ae3dbd7fe28d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 26 Mar 2022 13:48:15 +0100 Subject: [PATCH 074/708] scsi: pmcraid: Remove the PMCRAID_PASSTHROUGH_IOCTL ioctl implementation The whole passthrough ioctl path looks completely broken. For example it DMA maps the scatterlist and after that copies data to it, which is prohibited by the DMA API contract. Moreover, in pmcraid_alloc_sglist(), the pointer returned by a sgl_alloc_order() call is not recorded anywhere which is pointless. So remove the PMCRAID_PASSTHROUGH_IOCTL ioctl implementation entirely. Should it be needed, we should reimplement it using the proper block layer request mapping helpers. Link: https://lore.kernel.org/r/7f27a70bec3f3dcaf46a29b1c630edd4792e71c0.1648298857.git.christophe.jaillet@wanadoo.fr Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.c | 491 ----------------------------------------- drivers/scsi/pmcraid.h | 33 --- 2 files changed, 524 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 928532180d32..fd674ed1febe 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3181,124 +3181,6 @@ static int pmcraid_build_ioadl( return 0; } -/** - * pmcraid_free_sglist - Frees an allocated SG buffer list - * @sglist: scatter/gather list pointer - * - * Free a DMA'able memory previously allocated with pmcraid_alloc_sglist - * - * Return value: - * none - */ -static void pmcraid_free_sglist(struct pmcraid_sglist *sglist) -{ - sgl_free_order(sglist->scatterlist, sglist->order); - kfree(sglist); -} - -/** - * pmcraid_alloc_sglist - Allocates memory for a SG list - * @buflen: buffer length - * - * Allocates a DMA'able buffer in chunks and assembles a scatter/gather - * list. - * - * Return value - * pointer to sglist / NULL on failure - */ -static struct pmcraid_sglist *pmcraid_alloc_sglist(int buflen) -{ - struct pmcraid_sglist *sglist; - int sg_size; - int order; - - sg_size = buflen / (PMCRAID_MAX_IOADLS - 1); - order = (sg_size > 0) ? get_order(sg_size) : 0; - - /* Allocate a scatter/gather list for the DMA */ - sglist = kzalloc(sizeof(struct pmcraid_sglist), GFP_KERNEL); - if (sglist == NULL) - return NULL; - - sglist->order = order; - sgl_alloc_order(buflen, order, false, GFP_KERNEL | __GFP_ZERO, - &sglist->num_sg); - - return sglist; -} - -/** - * pmcraid_copy_sglist - Copy user buffer to kernel buffer's SG list - * @sglist: scatter/gather list pointer - * @buffer: buffer pointer - * @len: buffer length - * @direction: data transfer direction - * - * Copy a user buffer into a buffer allocated by pmcraid_alloc_sglist - * - * Return value: - * 0 on success / other on failure - */ -static int pmcraid_copy_sglist( - struct pmcraid_sglist *sglist, - void __user *buffer, - u32 len, - int direction -) -{ - struct scatterlist *sg; - void *kaddr; - int bsize_elem; - int i; - int rc = 0; - - /* Determine the actual number of bytes per element */ - bsize_elem = PAGE_SIZE * (1 << sglist->order); - - sg = sglist->scatterlist; - - for (i = 0; i < (len / bsize_elem); i++, sg = sg_next(sg), buffer += bsize_elem) { - struct page *page = sg_page(sg); - - kaddr = kmap(page); - if (direction == DMA_TO_DEVICE) - rc = copy_from_user(kaddr, buffer, bsize_elem); - else - rc = copy_to_user(buffer, kaddr, bsize_elem); - - kunmap(page); - - if (rc) { - pmcraid_err("failed to copy user data into sg list\n"); - return -EFAULT; - } - - sg->length = bsize_elem; - } - - if (len % bsize_elem) { - struct page *page = sg_page(sg); - - kaddr = kmap(page); - - if (direction == DMA_TO_DEVICE) - rc = copy_from_user(kaddr, buffer, len % bsize_elem); - else - rc = copy_to_user(buffer, kaddr, len % bsize_elem); - - kunmap(page); - - sg->length = len % bsize_elem; - } - - if (rc) { - pmcraid_err("failed to copy user data into sg list\n"); - rc = -EFAULT; - } - - return rc; -} - /** * pmcraid_queuecommand_lck - Queue a mid-layer request * @scsi_cmd: scsi command struct @@ -3454,365 +3336,6 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode) return rc; } - -/** - * pmcraid_build_passthrough_ioadls - builds SG elements for passthrough - * commands sent over IOCTL interface - * - * @cmd : pointer to struct pmcraid_cmd - * @buflen : length of the request buffer - * @direction : data transfer direction - * - * Return value - * 0 on success, non-zero error code on failure - */ -static int pmcraid_build_passthrough_ioadls( - struct pmcraid_cmd *cmd, - int buflen, - int direction -) -{ - struct pmcraid_sglist *sglist = NULL; - struct scatterlist *sg = NULL; - struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb; - struct pmcraid_ioadl_desc *ioadl; - int i; - - sglist = pmcraid_alloc_sglist(buflen); - - if (!sglist) { - pmcraid_err("can't allocate memory for passthrough SGls\n"); - return -ENOMEM; - } - - sglist->num_dma_sg = dma_map_sg(&cmd->drv_inst->pdev->dev, - sglist->scatterlist, - sglist->num_sg, direction); - - if (!sglist->num_dma_sg || sglist->num_dma_sg > PMCRAID_MAX_IOADLS) { - dev_err(&cmd->drv_inst->pdev->dev, - "Failed to map passthrough buffer!\n"); - pmcraid_free_sglist(sglist); - return -EIO; - } - - cmd->sglist = sglist; - ioarcb->request_flags0 |= NO_LINK_DESCS; - - ioadl = pmcraid_init_ioadls(cmd, sglist->num_dma_sg); - - /* Initialize IOADL descriptor addresses */ - for_each_sg(sglist->scatterlist, sg, sglist->num_dma_sg, i) { - ioadl[i].data_len = cpu_to_le32(sg_dma_len(sg)); - ioadl[i].address = cpu_to_le64(sg_dma_address(sg)); - ioadl[i].flags = 0; - } - - /* setup the last descriptor */ - ioadl[i - 1].flags = IOADL_FLAGS_LAST_DESC; - - return 0; -} - - -/** - * pmcraid_release_passthrough_ioadls - release passthrough ioadls - * - * @cmd: pointer to struct pmcraid_cmd for which ioadls were allocated - * @buflen: size of the request buffer - * @direction: data transfer direction - * - * Return value - * 0 on success, non-zero error code on failure - */ -static void pmcraid_release_passthrough_ioadls( - struct pmcraid_cmd *cmd, - int buflen, - int direction -) -{ - struct pmcraid_sglist *sglist = cmd->sglist; - - if (buflen > 0) { - dma_unmap_sg(&cmd->drv_inst->pdev->dev, - sglist->scatterlist, - sglist->num_sg, - direction); - pmcraid_free_sglist(sglist); - cmd->sglist = NULL; - } -} - -/** - * pmcraid_ioctl_passthrough - handling passthrough IOCTL commands - * - * @pinstance: pointer to adapter instance structure - * @ioctl_cmd: ioctl code - * @buflen: unused - * @arg: pointer to pmcraid_passthrough_buffer user buffer - * - * Return value - * 0 on success, non-zero error code on failure - */ -static long pmcraid_ioctl_passthrough( - struct pmcraid_instance *pinstance, - unsigned int ioctl_cmd, - unsigned int buflen, - void __user *arg -) -{ - struct pmcraid_passthrough_ioctl_buffer *buffer; - struct pmcraid_ioarcb *ioarcb; - struct pmcraid_cmd *cmd; - struct pmcraid_cmd *cancel_cmd; - void __user *request_buffer; - unsigned long request_offset; - unsigned long lock_flags; - void __user *ioasa; - u32 ioasc; - int request_size; - int buffer_size; - u8 direction; - int rc = 0; - - /* If IOA reset is in progress, wait 10 secs for reset to complete */ - if (pinstance->ioa_reset_in_progress) { - rc = wait_event_interruptible_timeout( - pinstance->reset_wait_q, - !pinstance->ioa_reset_in_progress, - msecs_to_jiffies(10000)); - - if (!rc) - return -ETIMEDOUT; - else if (rc < 0) - return -ERESTARTSYS; - } - - /* If adapter is not in operational state, return error */ - if (pinstance->ioa_state != IOA_STATE_OPERATIONAL) { - pmcraid_err("IOA is not operational\n"); - return -ENOTTY; - } - - buffer_size = sizeof(struct pmcraid_passthrough_ioctl_buffer); - buffer = kmalloc(buffer_size, GFP_KERNEL); - - if (!buffer) { - pmcraid_err("no memory for passthrough buffer\n"); - return -ENOMEM; - } - - request_offset = - offsetof(struct pmcraid_passthrough_ioctl_buffer, request_buffer); - - request_buffer = arg + request_offset; - - rc = copy_from_user(buffer, arg, - sizeof(struct pmcraid_passthrough_ioctl_buffer)); - - ioasa = arg + offsetof(struct pmcraid_passthrough_ioctl_buffer, ioasa); - - if (rc) { - pmcraid_err("ioctl: can't copy passthrough buffer\n"); - rc = -EFAULT; - goto out_free_buffer; - } - - request_size = le32_to_cpu(buffer->ioarcb.data_transfer_length); - - if (buffer->ioarcb.request_flags0 & TRANSFER_DIR_WRITE) { - direction = DMA_TO_DEVICE; - } else { - direction = DMA_FROM_DEVICE; - } - - if (request_size < 0) { - rc = -EINVAL; - goto out_free_buffer; - } - - /* check if we have any additional command parameters */ - if (le16_to_cpu(buffer->ioarcb.add_cmd_param_length) - > PMCRAID_ADD_CMD_PARAM_LEN) { - rc = -EINVAL; - goto out_free_buffer; - } - - cmd = pmcraid_get_free_cmd(pinstance); - - if (!cmd) { - pmcraid_err("free command block is not available\n"); - rc = -ENOMEM; - goto out_free_buffer; - } - - cmd->scsi_cmd = NULL; - ioarcb = &(cmd->ioa_cb->ioarcb); - - /* Copy the user-provided IOARCB stuff field by field */ - ioarcb->resource_handle = buffer->ioarcb.resource_handle; - ioarcb->data_transfer_length = buffer->ioarcb.data_transfer_length; - ioarcb->cmd_timeout = buffer->ioarcb.cmd_timeout; - ioarcb->request_type = buffer->ioarcb.request_type; - ioarcb->request_flags0 = buffer->ioarcb.request_flags0; - ioarcb->request_flags1 = buffer->ioarcb.request_flags1; - memcpy(ioarcb->cdb, buffer->ioarcb.cdb, PMCRAID_MAX_CDB_LEN); - - if (buffer->ioarcb.add_cmd_param_length) { - ioarcb->add_cmd_param_length = - buffer->ioarcb.add_cmd_param_length; - ioarcb->add_cmd_param_offset = - buffer->ioarcb.add_cmd_param_offset; - memcpy(ioarcb->add_data.u.add_cmd_params, - buffer->ioarcb.add_data.u.add_cmd_params, - le16_to_cpu(buffer->ioarcb.add_cmd_param_length)); - } - - /* set hrrq number where the IOA should respond to. Note that all cmds - * generated internally uses hrrq_id 0, exception to this is the cmd - * block of scsi_cmd which is re-used (e.g. cancel/abort), which uses - * hrrq_id assigned here in queuecommand - */ - ioarcb->hrrq_id = atomic_add_return(1, &(pinstance->last_message_id)) % - pinstance->num_hrrq; - - if (request_size) { - rc = pmcraid_build_passthrough_ioadls(cmd, - request_size, - direction); - if (rc) { - pmcraid_err("couldn't build passthrough ioadls\n"); - goto out_free_cmd; - } - } - - /* If data is being written into the device, copy the data from user - * buffers - */ - if (direction == DMA_TO_DEVICE && request_size > 0) { - rc = pmcraid_copy_sglist(cmd->sglist, - request_buffer, - request_size, - direction); - if (rc) { - pmcraid_err("failed to copy user buffer\n"); - goto out_free_sglist; - } - } - - /* passthrough ioctl is a blocking command so, put the user to sleep - * until timeout. Note that a timeout value of 0 means, do timeout. - */ - cmd->cmd_done = pmcraid_internal_done; - init_completion(&cmd->wait_for_completion); - cmd->completion_req = 1; - - pmcraid_info("command(%d) (CDB[0] = %x) for %x\n", - le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2, - cmd->ioa_cb->ioarcb.cdb[0], - le32_to_cpu(cmd->ioa_cb->ioarcb.resource_handle)); - - spin_lock_irqsave(pinstance->host->host_lock, lock_flags); - _pmcraid_fire_command(cmd); - spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags); - - /* NOTE ! Remove the below line once abort_task is implemented - * in firmware. This line disables ioctl command timeout handling logic - * similar to IO command timeout handling, making ioctl commands to wait - * until the command completion regardless of timeout value specified in - * ioarcb - */ - buffer->ioarcb.cmd_timeout = 0; - - /* If command timeout is specified put caller to wait till that time, - * otherwise it would be blocking wait. If command gets timed out, it - * will be aborted. - */ - if (buffer->ioarcb.cmd_timeout == 0) { - wait_for_completion(&cmd->wait_for_completion); - } else if (!wait_for_completion_timeout( - &cmd->wait_for_completion, - msecs_to_jiffies(le16_to_cpu(buffer->ioarcb.cmd_timeout) * 1000))) { - - pmcraid_info("aborting cmd %d (CDB[0] = %x) due to timeout\n", - le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2, - cmd->ioa_cb->ioarcb.cdb[0]); - - spin_lock_irqsave(pinstance->host->host_lock, lock_flags); - cancel_cmd = pmcraid_abort_cmd(cmd); - spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags); - - if (cancel_cmd) { - wait_for_completion(&cancel_cmd->wait_for_completion); - ioasc = le32_to_cpu(cancel_cmd->ioa_cb->ioasa.ioasc); - pmcraid_return_cmd(cancel_cmd); - - /* if abort task couldn't find the command i.e it got - * completed prior to aborting, return good completion. - * if command got aborted successfully or there was IOA - * reset due to abort task itself getting timedout then - * return -ETIMEDOUT - */ - if (ioasc == PMCRAID_IOASC_IOA_WAS_RESET || - PMCRAID_IOASC_SENSE_KEY(ioasc) == 0x00) { - if (ioasc != PMCRAID_IOASC_GC_IOARCB_NOTFOUND) - rc = -ETIMEDOUT; - goto out_handle_response; - } - } - - /* no command block for abort task or abort task failed to abort - * the IOARCB, then wait for 150 more seconds and initiate reset - * sequence after timeout - */ - if (!wait_for_completion_timeout( - &cmd->wait_for_completion, - msecs_to_jiffies(150 * 1000))) { - pmcraid_reset_bringup(cmd->drv_inst); - rc = -ETIMEDOUT; - } - } - -out_handle_response: - /* copy entire IOASA buffer and return IOCTL success. - * If copying IOASA to user-buffer fails, return - * EFAULT - */ - if (copy_to_user(ioasa, &cmd->ioa_cb->ioasa, - sizeof(struct pmcraid_ioasa))) { - pmcraid_err("failed to copy ioasa buffer to user\n"); - rc = -EFAULT; - } - - /* If the data transfer was from device, copy the data onto user - * buffers - */ - else if (direction == DMA_FROM_DEVICE && request_size > 0) { - rc = pmcraid_copy_sglist(cmd->sglist, - request_buffer, - request_size, - direction); - if (rc) { - pmcraid_err("failed to copy user buffer\n"); - rc = -EFAULT; - } - } - -out_free_sglist: - pmcraid_release_passthrough_ioadls(cmd, request_size, direction); - -out_free_cmd: - pmcraid_return_cmd(cmd); - -out_free_buffer: - kfree(buffer); - - return rc; -} - - - - /** * pmcraid_ioctl_driver - ioctl handler for commands handled by driver itself * @@ -3922,20 +3445,6 @@ static long pmcraid_chr_ioctl( switch (_IOC_TYPE(cmd)) { - case PMCRAID_PASSTHROUGH_IOCTL: - /* If ioctl code is to download microcode, we need to block - * mid-layer requests. - */ - if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE) - scsi_block_requests(pinstance->host); - - retval = pmcraid_ioctl_passthrough(pinstance, cmd, - hdr->buffer_length, argp); - - if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE) - scsi_unblock_requests(pinstance->host); - break; - case PMCRAID_DRIVER_IOCTL: arg += sizeof(struct pmcraid_ioctl_header); retval = pmcraid_ioctl_driver(pinstance, cmd, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index bbb75318f1e7..9f59930e8b4f 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -1022,41 +1022,16 @@ struct pmcraid_ioctl_header { #define PMCRAID_IOCTL_SIGNATURE "PMCRAID" -/* - * pmcraid_passthrough_ioctl_buffer - structure given as argument to - * passthrough(or firmware handled) IOCTL commands. Note that ioarcb requires - * 32-byte alignment so, it is necessary to pack this structure to avoid any - * holes between ioctl_header and passthrough buffer - * - * .ioactl_header : ioctl header - * .ioarcb : filled-up ioarcb buffer, driver always reads this buffer - * .ioasa : buffer for ioasa, driver fills this with IOASA from firmware - * .request_buffer: The I/O buffer (flat), driver reads/writes to this based on - * the transfer directions passed in ioarcb.flags0. Contents - * of this buffer are valid only when ioarcb.data_transfer_len - * is not zero. - */ -struct pmcraid_passthrough_ioctl_buffer { - struct pmcraid_ioctl_header ioctl_header; - struct pmcraid_ioarcb ioarcb; - struct pmcraid_ioasa ioasa; - u8 request_buffer[]; -} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT))); - /* * keys to differentiate between driver handled IOCTLs and passthrough * IOCTLs passed to IOA. driver determines the ioctl type using macro * _IOC_TYPE */ #define PMCRAID_DRIVER_IOCTL 'D' -#define PMCRAID_PASSTHROUGH_IOCTL 'F' #define DRV_IOCTL(n, size) \ _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_DRIVER_IOCTL, (n), (size)) -#define FMW_IOCTL(n, size) \ - _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_PASSTHROUGH_IOCTL, (n), (size)) - /* * _ARGSIZE: macro that gives size of the argument type passed to an IOCTL cmd. * This is to facilitate applications avoiding un-necessary memory allocations. @@ -1069,12 +1044,4 @@ struct pmcraid_passthrough_ioctl_buffer { #define PMCRAID_IOCTL_RESET_ADAPTER \ DRV_IOCTL(5, sizeof(struct pmcraid_ioctl_header)) -/* passthrough/firmware handled commands */ -#define PMCRAID_IOCTL_PASSTHROUGH_COMMAND \ - FMW_IOCTL(1, sizeof(struct pmcraid_passthrough_ioctl_buffer)) - -#define PMCRAID_IOCTL_DOWNLOAD_MICROCODE \ - FMW_IOCTL(2, sizeof(struct pmcraid_passthrough_ioctl_buffer)) - - #endif /* _PMCRAID_H */ From bc5519c18a32ce855bb51b9f5eceb77a9489d080 Mon Sep 17 00:00:00 2001 From: Kevin Groeneveld Date: Tue, 22 Mar 2022 20:22:42 -0400 Subject: [PATCH 075/708] scsi: sr: Fix typo in CDROM(CLOSETRAY|EJECT) handling Commit 2e27f576abc6 ("scsi: scsi_ioctl: Call scsi_cmd_ioctl() from scsi_ioctl()") seems to have a typo as it is checking ret instead of cmd in the if statement checking for CDROMCLOSETRAY and CDROMEJECT. This changes the behaviour of these ioctls as the cdrom_ioctl handling of these is more restrictive than the scsi_ioctl version. Link: https://lore.kernel.org/r/20220323002242.21157-1-kgroeneveld@lenbrook.com Fixes: 2e27f576abc6 ("scsi: scsi_ioctl: Call scsi_cmd_ioctl() from scsi_ioctl()") Reviewed-by: Christoph Hellwig Signed-off-by: Kevin Groeneveld Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index aaa54ad5f035..f232514f5a2b 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -578,7 +578,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, scsi_autopm_get_device(sdev); - if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) { + if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) { ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); if (ret != -ENOSYS) goto put; From 0bade8e53279157c7cc9dd95d573b7e82223d78a Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 22 Mar 2022 12:44:43 -0700 Subject: [PATCH 076/708] scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024 The adapter request_limit is hardcoded to be INITIAL_SRP_LIMIT which is currently an arbitrary value of 800. Increase this value to 1024 which better matches the characteristics of the typical IBMi Initiator that supports 32 LUNs and a queue depth of 32. This change also has the secondary benefit of being a power of two as required by the kfifo API. Since, Commit ab9bb6318b09 ("Partially revert "kfifo: fix kfifo_alloc() and kfifo_init()"") the size of IU pool for each target has been rounded down to 512 when attempting to kfifo_init() those pools with the current request_limit size of 800. Link: https://lore.kernel.org/r/20220322194443.678433-1-tyreld@linux.ibm.com Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 80238e6a3c98..eee1a24f7e15 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -36,7 +36,7 @@ #define IBMVSCSIS_VERSION "v0.2" -#define INITIAL_SRP_LIMIT 800 +#define INITIAL_SRP_LIMIT 1024 #define DEFAULT_MAX_SECTORS 256 #define MAX_TXU 1024 * 1024 From 63221571ef77c71a37d3c604d604168ce1de5cab Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 24 Mar 2022 08:29:37 +1100 Subject: [PATCH 077/708] scsi: aha152x: Stop using struct scsi_pointer Remove aha152x_cmd_priv.scsi_pointer by moving the necessary members into aha152x_cmd_priv proper. Tested with an Adaptec SlimSCSI APA-1460A card. Link: https://lore.kernel.org/r/bdc1264b6dd331150bffb737958cab8c9c068fa1.1648070977.git.fthain@linux-m68k.org Cc: Christoph Hellwig Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/aha152x.c | 235 ++++++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 122 deletions(-) diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 5f554a3a0f62..caeebfb67149 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -317,14 +317,18 @@ enum { }; struct aha152x_cmd_priv { - struct scsi_pointer scsi_pointer; + char *ptr; + int this_residual; + struct scatterlist *buffer; + int status; + int message; + int sent_command; + int phase; }; -static struct scsi_pointer *aha152x_scsi_pointer(struct scsi_cmnd *cmd) +static struct aha152x_cmd_priv *aha152x_priv(struct scsi_cmnd *cmd) { - struct aha152x_cmd_priv *acmd = scsi_cmd_priv(cmd); - - return &acmd->scsi_pointer; + return scsi_cmd_priv(cmd); } MODULE_AUTHOR("Jürgen Fischer"); @@ -890,17 +894,16 @@ void aha152x_release(struct Scsi_Host *shpnt) static int setup_expected_interrupts(struct Scsi_Host *shpnt) { if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); - scsi_pointer->phase |= 1 << 16; + acp->phase |= 1 << 16; - if (scsi_pointer->phase & selecting) { + if (acp->phase & selecting) { SETPORT(SSTAT1, SELTO); SETPORT(SIMODE0, ENSELDO | (DISCONNECTED_SC ? ENSELDI : 0)); SETPORT(SIMODE1, ENSELTIMO); } else { - SETPORT(SIMODE0, (scsi_pointer->phase & spiordy) ? ENSPIORDY : 0); + SETPORT(SIMODE0, (acp->phase & spiordy) ? ENSPIORDY : 0); SETPORT(SIMODE1, ENPHASEMIS | ENSCSIRST | ENSCSIPERR | ENBUSFREE); } } else if(STATE==seldi) { @@ -924,17 +927,16 @@ static int setup_expected_interrupts(struct Scsi_Host *shpnt) static int aha152x_internal_queue(struct scsi_cmnd *SCpnt, struct completion *complete, int phase) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(SCpnt); + struct aha152x_cmd_priv *acp = aha152x_priv(SCpnt); struct Scsi_Host *shpnt = SCpnt->device->host; unsigned long flags; - scsi_pointer->phase = not_issued | phase; - scsi_pointer->Status = 0x1; /* Ilegal status by SCSI standard */ - scsi_pointer->Message = 0; - scsi_pointer->have_data_in = 0; - scsi_pointer->sent_command = 0; + acp->phase = not_issued | phase; + acp->status = 0x1; /* Illegal status by SCSI standard */ + acp->message = 0; + acp->sent_command = 0; - if (scsi_pointer->phase & (resetting | check_condition)) { + if (acp->phase & (resetting | check_condition)) { if (!SCpnt->host_scribble || SCSEM(SCpnt) || SCNEXT(SCpnt)) { scmd_printk(KERN_ERR, SCpnt, "cannot reuse command\n"); return FAILED; @@ -957,15 +959,15 @@ static int aha152x_internal_queue(struct scsi_cmnd *SCpnt, SCp.phase : current state of the command */ if ((phase & resetting) || !scsi_sglist(SCpnt)) { - scsi_pointer->ptr = NULL; - scsi_pointer->this_residual = 0; + acp->ptr = NULL; + acp->this_residual = 0; scsi_set_resid(SCpnt, 0); - scsi_pointer->buffer = NULL; + acp->buffer = NULL; } else { scsi_set_resid(SCpnt, scsi_bufflen(SCpnt)); - scsi_pointer->buffer = scsi_sglist(SCpnt); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = scsi_sglist(SCpnt); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } DO_LOCK(flags); @@ -1015,7 +1017,7 @@ static void reset_done(struct scsi_cmnd *SCpnt) static void aha152x_scsi_done(struct scsi_cmnd *SCpnt) { - if (aha152x_scsi_pointer(SCpnt)->phase & resetting) + if (aha152x_priv(SCpnt)->phase & resetting) reset_done(SCpnt); else scsi_done(SCpnt); @@ -1101,7 +1103,7 @@ static int aha152x_device_reset(struct scsi_cmnd * SCpnt) DO_LOCK(flags); - if (aha152x_scsi_pointer(SCpnt)->phase & resetted) { + if (aha152x_priv(SCpnt)->phase & resetted) { HOSTDATA(shpnt)->commands--; if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); @@ -1395,31 +1397,30 @@ static void busfree_run(struct Scsi_Host *shpnt) SETPORT(SSTAT1, CLRBUSFREE); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); #if defined(AHA152X_STAT) action++; #endif - scsi_pointer->phase &= ~syncneg; + acp->phase &= ~syncneg; - if (scsi_pointer->phase & completed) { + if (acp->phase & completed) { /* target sent COMMAND COMPLETE */ - done(shpnt, scsi_pointer->Status, DID_OK); + done(shpnt, acp->status, DID_OK); - } else if (scsi_pointer->phase & aborted) { - done(shpnt, scsi_pointer->Status, DID_ABORT); + } else if (acp->phase & aborted) { + done(shpnt, acp->status, DID_ABORT); - } else if (scsi_pointer->phase & resetted) { - done(shpnt, scsi_pointer->Status, DID_RESET); + } else if (acp->phase & resetted) { + done(shpnt, acp->status, DID_RESET); - } else if (scsi_pointer->phase & disconnected) { + } else if (acp->phase & disconnected) { /* target sent DISCONNECT */ #if defined(AHA152X_STAT) HOSTDATA(shpnt)->disconnections++; #endif append_SC(&DISCONNECTED_SC, CURRENT_SC); - scsi_pointer->phase |= 1 << 16; + acp->phase |= 1 << 16; CURRENT_SC = NULL; } else { @@ -1438,24 +1439,23 @@ static void busfree_run(struct Scsi_Host *shpnt) action++; #endif - if (aha152x_scsi_pointer(DONE_SC)->phase & check_condition) { + if (aha152x_priv(DONE_SC)->phase & check_condition) { struct scsi_cmnd *cmd = HOSTDATA(shpnt)->done_SC; struct aha152x_scdata *sc = SCDATA(cmd); scsi_eh_restore_cmnd(cmd, &sc->ses); - aha152x_scsi_pointer(cmd)->Status = SAM_STAT_CHECK_CONDITION; + aha152x_priv(cmd)->status = SAM_STAT_CHECK_CONDITION; HOSTDATA(shpnt)->commands--; if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); /* turn led off */ - } else if (aha152x_scsi_pointer(DONE_SC)->Status == - SAM_STAT_CHECK_CONDITION) { + } else if (aha152x_priv(DONE_SC)->status == SAM_STAT_CHECK_CONDITION) { #if defined(AHA152X_STAT) HOSTDATA(shpnt)->busfree_with_check_condition++; #endif - if(!(aha152x_scsi_pointer(DONE_SC)->phase & not_issued)) { + if (!(aha152x_priv(DONE_SC)->phase & not_issued)) { struct aha152x_scdata *sc; struct scsi_cmnd *ptr = DONE_SC; DONE_SC=NULL; @@ -1480,7 +1480,7 @@ static void busfree_run(struct Scsi_Host *shpnt) if (!HOSTDATA(shpnt)->commands) SETPORT(PORTA, 0); /* turn led off */ - if (!(aha152x_scsi_pointer(ptr)->phase & resetting)) { + if (!(aha152x_priv(ptr)->phase & resetting)) { kfree(ptr->host_scribble); ptr->host_scribble=NULL; } @@ -1503,13 +1503,12 @@ static void busfree_run(struct Scsi_Host *shpnt) DO_UNLOCK(flags); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); #if defined(AHA152X_STAT) action++; #endif - scsi_pointer->phase |= selecting; + acp->phase |= selecting; /* clear selection timeout */ SETPORT(SSTAT1, SELTO); @@ -1537,13 +1536,13 @@ static void busfree_run(struct Scsi_Host *shpnt) */ static void seldo_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); SETPORT(SCSISIG, 0); SETPORT(SSTAT1, CLRBUSFREE); SETPORT(SSTAT1, CLRPHASECHG); - scsi_pointer->phase &= ~(selecting | not_issued); + acp->phase &= ~(selecting | not_issued); SETPORT(SCSISEQ, 0); @@ -1558,12 +1557,12 @@ static void seldo_run(struct Scsi_Host *shpnt) ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun)); - if (scsi_pointer->phase & aborting) { + if (acp->phase & aborting) { ADDMSGO(ABORT); - } else if (scsi_pointer->phase & resetting) { + } else if (acp->phase & resetting) { ADDMSGO(BUS_DEVICE_RESET); } else if (SYNCNEG==0 && SYNCHRONOUS) { - scsi_pointer->phase |= syncneg; + acp->phase |= syncneg; MSGOLEN += spi_populate_sync_msg(&MSGO(MSGOLEN), 50, 8); SYNCNEG=1; /* negotiation in progress */ } @@ -1578,7 +1577,7 @@ static void seldo_run(struct Scsi_Host *shpnt) */ static void selto_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp; SETPORT(SCSISEQ, 0); SETPORT(SSTAT1, CLRSELTIMO); @@ -1586,9 +1585,10 @@ static void selto_run(struct Scsi_Host *shpnt) if (!CURRENT_SC) return; - scsi_pointer->phase &= ~selecting; + acp = aha152x_priv(CURRENT_SC); + acp->phase &= ~selecting; - if (scsi_pointer->phase & aborted) + if (acp->phase & aborted) done(shpnt, SAM_STAT_GOOD, DID_ABORT); else if (TESTLO(SSTAT0, SELINGO)) done(shpnt, SAM_STAT_GOOD, DID_BUS_BUSY); @@ -1616,10 +1616,9 @@ static void seldi_run(struct Scsi_Host *shpnt) SETPORT(SSTAT1, CLRPHASECHG); if(CURRENT_SC) { - struct scsi_pointer *scsi_pointer = - aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); - if (!(scsi_pointer->phase & not_issued)) + if (!(acp->phase & not_issued)) scmd_printk(KERN_ERR, CURRENT_SC, "command should not have been issued yet\n"); @@ -1676,7 +1675,7 @@ static void seldi_run(struct Scsi_Host *shpnt) static void msgi_run(struct Scsi_Host *shpnt) { for(;;) { - struct scsi_pointer *scsi_pointer; + struct aha152x_cmd_priv *acp; int sstat1 = GETPORT(SSTAT1); if(sstat1 & (PHASECHG|PHASEMIS|BUSFREE) || !(sstat1 & REQINIT)) @@ -1714,9 +1713,9 @@ static void msgi_run(struct Scsi_Host *shpnt) continue; } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - scsi_pointer->Message = MSGI(0); - scsi_pointer->phase &= ~disconnected; + acp = aha152x_priv(CURRENT_SC); + acp->message = MSGI(0); + acp->phase &= ~disconnected; MSGILEN=0; @@ -1724,8 +1723,8 @@ static void msgi_run(struct Scsi_Host *shpnt) continue; } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - scsi_pointer->Message = MSGI(0); + acp = aha152x_priv(CURRENT_SC); + acp->message = MSGI(0); switch (MSGI(0)) { case DISCONNECT: @@ -1733,11 +1732,11 @@ static void msgi_run(struct Scsi_Host *shpnt) scmd_printk(KERN_WARNING, CURRENT_SC, "target was not allowed to disconnect\n"); - scsi_pointer->phase |= disconnected; + acp->phase |= disconnected; break; case COMMAND_COMPLETE: - scsi_pointer->phase |= completed; + acp->phase |= completed; break; case MESSAGE_REJECT: @@ -1867,11 +1866,9 @@ static void msgi_end(struct Scsi_Host *shpnt) */ static void msgo_init(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - if(MSGOLEN==0) { - if ((scsi_pointer->phase & syncneg) && SYNCNEG==2 && - SYNCRATE==0) { + if ((aha152x_priv(CURRENT_SC)->phase & syncneg) && + SYNCNEG == 2 && SYNCRATE == 0) { ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun)); } else { scmd_printk(KERN_INFO, CURRENT_SC, @@ -1888,7 +1885,7 @@ static void msgo_init(struct Scsi_Host *shpnt) */ static void msgo_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); while(MSGO_Iphase |= identified; + acp->phase |= identified; if (MSGO(MSGO_I)==ABORT) - scsi_pointer->phase |= aborted; + acp->phase |= aborted; if (MSGO(MSGO_I)==BUS_DEVICE_RESET) - scsi_pointer->phase |= resetted; + acp->phase |= resetted; SETPORT(SCSIDAT, MSGO(MSGO_I++)); } @@ -1936,7 +1933,7 @@ static void msgo_end(struct Scsi_Host *shpnt) */ static void cmd_init(struct Scsi_Host *shpnt) { - if (aha152x_scsi_pointer(CURRENT_SC)->sent_command) { + if (aha152x_priv(CURRENT_SC)->sent_command) { scmd_printk(KERN_ERR, CURRENT_SC, "command already sent\n"); done(shpnt, SAM_STAT_GOOD, DID_ERROR); @@ -1967,7 +1964,7 @@ static void cmd_end(struct Scsi_Host *shpnt) "command sent incompletely (%d/%d)\n", CMD_I, CURRENT_SC->cmd_len); else - aha152x_scsi_pointer(CURRENT_SC)->sent_command++; + aha152x_priv(CURRENT_SC)->sent_command++; } /* @@ -1979,7 +1976,7 @@ static void status_run(struct Scsi_Host *shpnt) if (TESTLO(SSTAT0, SPIORDY)) return; - aha152x_scsi_pointer(CURRENT_SC)->Status = GETPORT(SCSIDAT); + aha152x_priv(CURRENT_SC)->status = GETPORT(SCSIDAT); } @@ -2003,7 +2000,7 @@ static void datai_init(struct Scsi_Host *shpnt) static void datai_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer; + struct aha152x_cmd_priv *acp; unsigned long the_time; int fifodata, data_count; @@ -2041,36 +2038,35 @@ static void datai_run(struct Scsi_Host *shpnt) fifodata = GETPORT(FIFOSTAT); } - scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); - if (scsi_pointer->this_residual > 0) { - while (fifodata > 0 && scsi_pointer->this_residual > 0) { - data_count = fifodata > scsi_pointer->this_residual ? - scsi_pointer->this_residual : - fifodata; + acp = aha152x_priv(CURRENT_SC); + if (acp->this_residual > 0) { + while (fifodata > 0 && acp->this_residual > 0) { + data_count = fifodata > acp->this_residual ? + acp->this_residual : fifodata; fifodata -= data_count; if (data_count & 1) { SETPORT(DMACNTRL0, ENDMA|_8BIT); - *scsi_pointer->ptr++ = GETPORT(DATAPORT); - scsi_pointer->this_residual--; + *acp->ptr++ = GETPORT(DATAPORT); + acp->this_residual--; DATA_LEN++; SETPORT(DMACNTRL0, ENDMA); } if (data_count > 1) { data_count >>= 1; - insw(DATAPORT, scsi_pointer->ptr, data_count); - scsi_pointer->ptr += 2 * data_count; - scsi_pointer->this_residual -= 2 * data_count; + insw(DATAPORT, acp->ptr, data_count); + acp->ptr += 2 * data_count; + acp->this_residual -= 2 * data_count; DATA_LEN += 2 * data_count; } - if (scsi_pointer->this_residual == 0 && - !sg_is_last(scsi_pointer->buffer)) { + if (acp->this_residual == 0 && + !sg_is_last(acp->buffer)) { /* advance to next buffer */ - scsi_pointer->buffer = sg_next(scsi_pointer->buffer); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = sg_next(acp->buffer); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } } } else if (fifodata > 0) { @@ -2138,15 +2134,15 @@ static void datao_init(struct Scsi_Host *shpnt) static void datao_run(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); unsigned long the_time; int data_count; /* until phase changes or all data sent */ - while (TESTLO(DMASTAT, INTSTAT) && scsi_pointer->this_residual > 0) { + while (TESTLO(DMASTAT, INTSTAT) && acp->this_residual > 0) { data_count = 128; - if (data_count > scsi_pointer->this_residual) - data_count = scsi_pointer->this_residual; + if (data_count > acp->this_residual) + data_count = acp->this_residual; if(TESTLO(DMASTAT, DFIFOEMP)) { scmd_printk(KERN_ERR, CURRENT_SC, @@ -2157,26 +2153,25 @@ static void datao_run(struct Scsi_Host *shpnt) if(data_count & 1) { SETPORT(DMACNTRL0,WRITE_READ|ENDMA|_8BIT); - SETPORT(DATAPORT, *scsi_pointer->ptr++); - scsi_pointer->this_residual--; + SETPORT(DATAPORT, *acp->ptr++); + acp->this_residual--; CMD_INC_RESID(CURRENT_SC, -1); SETPORT(DMACNTRL0,WRITE_READ|ENDMA); } if(data_count > 1) { data_count >>= 1; - outsw(DATAPORT, scsi_pointer->ptr, data_count); - scsi_pointer->ptr += 2 * data_count; - scsi_pointer->this_residual -= 2 * data_count; + outsw(DATAPORT, acp->ptr, data_count); + acp->ptr += 2 * data_count; + acp->this_residual -= 2 * data_count; CMD_INC_RESID(CURRENT_SC, -2 * data_count); } - if (scsi_pointer->this_residual == 0 && - !sg_is_last(scsi_pointer->buffer)) { + if (acp->this_residual == 0 && !sg_is_last(acp->buffer)) { /* advance to next buffer */ - scsi_pointer->buffer = sg_next(scsi_pointer->buffer); - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer); - scsi_pointer->this_residual = scsi_pointer->buffer->length; + acp->buffer = sg_next(acp->buffer); + acp->ptr = SG_ADDRESS(acp->buffer); + acp->this_residual = acp->buffer->length; } the_time=jiffies + 100*HZ; @@ -2192,7 +2187,7 @@ static void datao_run(struct Scsi_Host *shpnt) static void datao_end(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); + struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC); if(TESTLO(DMASTAT, DFIFOEMP)) { u32 datao_cnt = GETSTCNT(); @@ -2211,10 +2206,9 @@ static void datao_end(struct Scsi_Host *shpnt) sg = sg_next(sg); } - scsi_pointer->buffer = sg; - scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer) + done; - scsi_pointer->this_residual = scsi_pointer->buffer->length - - done; + acp->buffer = sg; + acp->ptr = SG_ADDRESS(acp->buffer) + done; + acp->this_residual = acp->buffer->length - done; } SETPORT(SXFRCTL0, CH1|CLRCH1|CLRSTCNT); @@ -2229,7 +2223,6 @@ static void datao_end(struct Scsi_Host *shpnt) */ static int update_state(struct Scsi_Host *shpnt) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC); int dataphase=0; unsigned int stat0 = GETPORT(SSTAT0); unsigned int stat1 = GETPORT(SSTAT1); @@ -2244,7 +2237,7 @@ static int update_state(struct Scsi_Host *shpnt) } else if (stat0 & SELDI && PREVSTATE == busfree) { STATE=seldi; } else if (stat0 & SELDO && CURRENT_SC && - (scsi_pointer->phase & selecting)) { + (aha152x_priv(CURRENT_SC)->phase & selecting)) { STATE=seldo; } else if(stat1 & SELTO) { STATE=selto; @@ -2376,8 +2369,7 @@ static void is_complete(struct Scsi_Host *shpnt) SETPORT(SXFRCTL0, CH1); SETPORT(DMACNTRL0, 0); if(CURRENT_SC) - aha152x_scsi_pointer(CURRENT_SC)->phase &= - ~spiordy; + aha152x_priv(CURRENT_SC)->phase &= ~spiordy; } /* @@ -2399,8 +2391,7 @@ static void is_complete(struct Scsi_Host *shpnt) SETPORT(DMACNTRL0, 0); SETPORT(SXFRCTL0, CH1|SPIOEN); if(CURRENT_SC) - aha152x_scsi_pointer(CURRENT_SC)->phase |= - spiordy; + aha152x_priv(CURRENT_SC)->phase |= spiordy; } /* @@ -2490,7 +2481,7 @@ static void disp_enintr(struct Scsi_Host *shpnt) */ static void show_command(struct scsi_cmnd *ptr) { - const int phase = aha152x_scsi_pointer(ptr)->phase; + const int phase = aha152x_priv(ptr)->phase; scsi_print_command(ptr); scmd_printk(KERN_DEBUG, ptr, @@ -2538,8 +2529,8 @@ static void show_queues(struct Scsi_Host *shpnt) static void get_command(struct seq_file *m, struct scsi_cmnd * ptr) { - struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(ptr); - const int phase = scsi_pointer->phase; + struct aha152x_cmd_priv *acp = aha152x_priv(ptr); + const int phase = acp->phase; int i; seq_printf(m, "%p: target=%d; lun=%d; cmnd=( ", @@ -2549,8 +2540,8 @@ static void get_command(struct seq_file *m, struct scsi_cmnd * ptr) seq_printf(m, "0x%02x ", ptr->cmnd[i]); seq_printf(m, "); resid=%d; residual=%d; buffers=%d; phase |", - scsi_get_resid(ptr), scsi_pointer->this_residual, - sg_nents(scsi_pointer->buffer) - 1); + scsi_get_resid(ptr), acp->this_residual, + sg_nents(acp->buffer) - 1); if (phase & not_issued) seq_puts(m, "not issued|"); From eaba83b5b8506bbc9ee7ca2f10aeab3fff3719e7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 16 Mar 2022 17:44:30 +0800 Subject: [PATCH 078/708] scsi: core: Fix sbitmap depth in scsi_realloc_sdev_budget_map() In commit edb854a3680b ("scsi: core: Reallocate device's budget map on queue depth change"), the sbitmap for the device budget map may be reallocated after the slave device depth is configured. When the sbitmap is reallocated we use the result from scsi_device_max_queue_depth() for the sbitmap size, but don't resize to match the actual device queue depth. Fix by resizing the sbitmap after reallocating the budget sbitmap. We do this instead of init'ing the sbitmap to the device queue depth as the user may want to change the queue depth later via sysfs or other. Link: https://lore.kernel.org/r/1647423870-143867-1-git-send-email-john.garry@huawei.com Fixes: edb854a3680b ("scsi: core: Reallocate device's budget map on queue depth change") Tested-by: Damien Le Moal Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f4e6c68ac99e..2ef78083f1ef 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -223,6 +223,8 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int ret; struct sbitmap sb_backup; + depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev)); + /* * realloc if new shift is calculated, which is caused by setting * up one new default queue depth after calling ->slave_configure @@ -245,6 +247,9 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, scsi_device_max_queue_depth(sdev), new_shift, GFP_KERNEL, sdev->request_queue->node, false, true); + if (!ret) + sbitmap_resize(&sdev->budget_map, depth); + if (need_free) { if (ret) sdev->budget_map = sb_backup; From fac952bb546a9f103a769d7105194175e11abc99 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Mar 2022 23:56:15 +0000 Subject: [PATCH 079/708] scsi: isci: Fix spelling mistake "doesnt" -> "doesn't" There are a few spelling mistakes in dev_warn and dev_err messages. Fix these. Link: https://lore.kernel.org/r/20220316235615.56683-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/host.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index d690d9cf7eb1..35589b6af90d 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -413,7 +413,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) dev_warn(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received " "event 0x%x for io request object " - "that doesnt exist.\n", + "that doesn't exist.\n", __func__, ihost, ent); @@ -428,7 +428,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) dev_warn(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received " "event 0x%x for remote device object " - "that doesnt exist.\n", + "that doesn't exist.\n", __func__, ihost, ent); @@ -462,7 +462,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent) } else dev_err(&ihost->pdev->dev, "%s: SCIC Controller 0x%p received event 0x%x " - "for remote device object 0x%0x that doesnt " + "for remote device object 0x%0x that doesn't " "exist.\n", __func__, ihost, From 41b8c2a31472a97349fe54c3a6b3176d9cdc31be Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 16 Mar 2022 12:20:06 -0700 Subject: [PATCH 080/708] scsi: virtio-scsi: Eliminate anonymous module_init & module_exit Eliminate anonymous module_init() and module_exit(), which can lead to confusion or ambiguity when reading System.map, crashes/oops/bugs, or an initcall_debug log. Give each of these init and exit functions unique driver-specific names to eliminate the anonymous names. Example 1: (System.map) ffffffff832fc78c t init ffffffff832fc79e t init ffffffff832fc8f8 t init Example 2: (initcall_debug log) calling init+0x0/0x12 @ 1 initcall init+0x0/0x12 returned 0 after 15 usecs calling init+0x0/0x60 @ 1 initcall init+0x0/0x60 returned 0 after 2 usecs calling init+0x0/0x9a @ 1 initcall init+0x0/0x9a returned 0 after 74 usecs Link: https://lore.kernel.org/r/20220316192010.19001-6-rdunlap@infradead.org Fixes: 4fe74b1cb051 ("[SCSI] virtio-scsi: SCSI driver for QEMU based virtual machines") Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Reviewed-by: Stefan Hajnoczi Reviewed-by: Ira Weiny Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 0e6110da69e7..578c4b6d0f7d 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -988,7 +988,7 @@ static struct virtio_driver virtio_scsi_driver = { .remove = virtscsi_remove, }; -static int __init init(void) +static int __init virtio_scsi_init(void) { int ret = -ENOMEM; @@ -1020,14 +1020,14 @@ error: return ret; } -static void __exit fini(void) +static void __exit virtio_scsi_fini(void) { unregister_virtio_driver(&virtio_scsi_driver); mempool_destroy(virtscsi_cmd_pool); kmem_cache_destroy(virtscsi_cmd_cache); } -module_init(init); -module_exit(fini); +module_init(virtio_scsi_init); +module_exit(virtio_scsi_fini); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio SCSI HBA driver"); From 066f4c31945ce2cb30e840794ee01713dec73b74 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Mar 2022 10:52:14 +0300 Subject: [PATCH 081/708] scsi: hisi_sas: Remove stray fallthrough annotation This case statement doesn't fall through any more so remove the fallthrough annotation. Link: https://lore.kernel.org/r/20220317075214.GC25237@kili Acked-by: John Garry Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 461ef8a76c4c..4bda2f6cb352 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -442,7 +442,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba, case SAS_PROTOCOL_INTERNAL_ABORT: hisi_sas_task_prep_abort(hisi_hba, slot); break; - fallthrough; default: return; } From 99241e119f4a2077383f994a174b38ced21d6fca Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Tue, 29 Mar 2022 10:12:51 +0800 Subject: [PATCH 082/708] scsi: core: sysfs: Remove comments that conflict with the actual logic Christoph Hellwig Says: ======================= I think we should just handle the error properly and remove the comment. There's no good reason to ignore bsg registration errors. In fact, after commit 92c4b58b15c5 ("scsi: core: Register sysfs attributes earlier"), we are already forced to return errno. We discuss this issue in [1]. [1] https://lore.kernel.org/all/20211022010201.426746-1-liu.yun@linux.dev/ Link: https://lore.kernel.org/r/20220329021251.123805-1-liu.yun@linux.dev Suggested-by: Christoph Hellwig Reviewed-by: Guenter Roeck Reviewed-by: Christoph Hellwig Signed-off-by: Jackie Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 226a50944c00..dc6872e352bd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1384,10 +1384,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) { sdev->bsg_dev = scsi_bsg_register_queue(sdev); if (IS_ERR(sdev->bsg_dev)) { - /* - * We're treating error on bsg register as non-fatal, so - * pretend nothing went wrong. - */ error = PTR_ERR(sdev->bsg_dev); sdev_printk(KERN_INFO, sdev, "Failed to register bsg queue, errno=%d\n", From 5ca0faf9c292029ec6f9edbfe7b42e97dcb87dca Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 21 Mar 2022 16:18:53 +0100 Subject: [PATCH 083/708] scsi: ufs: qcom: Drop custom Android boot parameters The QCOM UFS driver requires an androidboot.bootdevice command line argument matching the UFS device name. If the name is different, it refuses to probe. This androidboot.bootdevice is provided by stock/vendor (from an Android-based device) bootloader. This does not make sense from Linux point of view. Driver should be able to boot regardless of bootloader. Driver should not depend on some Android custom environment data. Link: https://lore.kernel.org/r/20220321151853.24138-1-krzk@kernel.org Tested-by: Amit Pundir Tested-by: Luca Weiss Reviewed-by: Brian Masney Reviewed-by: Bjorn Andersson Reviewed-by: Alim Akhtar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 0d2e950d0865..586c0e567ff9 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -957,18 +957,6 @@ static const struct reset_control_ops ufs_qcom_reset_ops = { .deassert = ufs_qcom_reset_deassert, }; -#define ANDROID_BOOT_DEV_MAX 30 -static char android_boot_dev[ANDROID_BOOT_DEV_MAX]; - -#ifndef MODULE -static int __init get_android_boot_dev(char *str) -{ - strlcpy(android_boot_dev, str, ANDROID_BOOT_DEV_MAX); - return 1; -} -__setup("androidboot.bootdevice=", get_android_boot_dev); -#endif - /** * ufs_qcom_init - bind phy with controller * @hba: host controller instance @@ -988,9 +976,6 @@ static int ufs_qcom_init(struct ufs_hba *hba) struct resource *res; struct ufs_clk_info *clki; - if (strlen(android_boot_dev) && strcmp(android_boot_dev, dev_name(dev))) - return -ENODEV; - host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL); if (!host) { err = -ENOMEM; From 37a9bd7090cdf6657ced3f693897819a66ee8d52 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 22 Mar 2022 07:46:48 -0700 Subject: [PATCH 084/708] scsi: aic7xxx: Use standard PCI subsystem, subdevice defines Common defines should be used over custom defines. Change and remove these defines: - PCIR_SUBVEND_0 to PCI_SUBSYSTEM_VENDOR_ID - PCIR_SUBDEV_0 to PCI_SUBSYSTEM_ID Link: https://lore.kernel.org/r/20220322144648.2467777-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic79xx_osm.h | 2 -- drivers/scsi/aic7xxx/aic79xx_pci.c | 6 +++--- drivers/scsi/aic7xxx/aic7xxx_osm.h | 2 -- drivers/scsi/aic7xxx/aic7xxx_pci.c | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.h b/drivers/scsi/aic7xxx/aic79xx_osm.h index 679a4fd13874..793fe19993a9 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.h +++ b/drivers/scsi/aic7xxx/aic79xx_osm.h @@ -420,8 +420,6 @@ ahd_unlock(struct ahd_softc *ahd, unsigned long *flags) /* config registers for header type 0 devices */ #define PCIR_MAPS 0x10 -#define PCIR_SUBVEND_0 0x2c -#define PCIR_SUBDEV_0 0x2e /****************************** PCI-X definitions *****************************/ #define PCIXR_COMMAND 0x96 diff --git a/drivers/scsi/aic7xxx/aic79xx_pci.c b/drivers/scsi/aic7xxx/aic79xx_pci.c index 2f0bdb9225a4..5fad41b1ab58 100644 --- a/drivers/scsi/aic7xxx/aic79xx_pci.c +++ b/drivers/scsi/aic7xxx/aic79xx_pci.c @@ -260,8 +260,8 @@ ahd_find_pci_device(ahd_dev_softc_t pci) vendor = ahd_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2); device = ahd_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2); - subvendor = ahd_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2); - subdevice = ahd_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2); + subvendor = ahd_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); + subdevice = ahd_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2); full_id = ahd_compose_id(device, vendor, subdevice, @@ -298,7 +298,7 @@ ahd_pci_config(struct ahd_softc *ahd, const struct ahd_pci_identity *entry) * Record if this is an HP board. */ subvendor = ahd_pci_read_config(ahd->dev_softc, - PCIR_SUBVEND_0, /*bytes*/2); + PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); if (subvendor == SUBID_HP) ahd->flags |= AHD_HP_BOARD; diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.h b/drivers/scsi/aic7xxx/aic7xxx_osm.h index 4782a304e93c..51d9f4de0734 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -433,8 +433,6 @@ ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) /* config registers for header type 0 devices */ #define PCIR_MAPS 0x10 -#define PCIR_SUBVEND_0 0x2c -#define PCIR_SUBDEV_0 0x2e typedef enum { diff --git a/drivers/scsi/aic7xxx/aic7xxx_pci.c b/drivers/scsi/aic7xxx/aic7xxx_pci.c index dab3a6d12c4d..2d4c85426dc3 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_pci.c @@ -673,8 +673,8 @@ ahc_find_pci_device(ahc_dev_softc_t pci) vendor = ahc_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2); device = ahc_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2); - subvendor = ahc_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2); - subdevice = ahc_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2); + subvendor = ahc_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2); + subdevice = ahc_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2); full_id = ahc_compose_id(device, vendor, subdevice, subvendor); /* From 16ed828b872d12ccba8f07bcc446ae89ba662f9c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Mar 2022 08:01:24 +0100 Subject: [PATCH 085/708] scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one() The error handling path of the probe releases a resource that is not freed in the remove function. In some cases, a ioremap() must be undone. Add the missing iounmap() call in the remove function. Link: https://lore.kernel.org/r/247066a3104d25f9a05de8b3270fc3c848763bcc.1647673264.git.christophe.jaillet@wanadoo.fr Fixes: 45804fbb00ee ("[SCSI] 53c700: Amiga Zorro NCR53c710 SCSI") Reviewed-by: Geert Uytterhoeven Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/zorro7xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index 27b9e2baab1a..7acf9193a9e8 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z) scsi_remove_host(host); NCR_700_release(host); + if (host->base > 0x01000000) + iounmap(hostdata->base); kfree(hostdata); free_irq(host->irq, host); zorro_release_device(z); From 7ff897b2a59558d919b72f1aef58b1aa63d19c61 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 19 Mar 2022 23:11:22 +0000 Subject: [PATCH 086/708] scsi: bnx2fc: Fix spelling mistake "mis-match" -> "mismatch" There are a few spelling mistakes in some error messages. Fix them. Link: https://lore.kernel.org/r/20220319231122.21476-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 0103f811cc25..776544385598 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -1169,7 +1169,7 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, ofld_kcqe->fcoe_conn_context_id); interface = tgt->port->priv; if (hba != interface->hba) { - printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mis-match\n"); + printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mismatch\n"); goto ofld_cmpl_err; } /* @@ -1226,12 +1226,12 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, * and enable */ if (tgt->context_id != context_id) { - printk(KERN_ERR PFX "context id mis-match\n"); + printk(KERN_ERR PFX "context id mismatch\n"); return; } interface = tgt->port->priv; if (hba != interface->hba) { - printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); + printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mismatch\n"); goto enbl_cmpl_err; } if (!ofld_kcqe->completion_status) From a6b758b0420bda28995ea6939ca0808fcec68be4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 19 Mar 2022 23:14:45 +0000 Subject: [PATCH 087/708] scsi: bnx2i: Fix spelling mistake "mis-match" -> "mismatch" There are a few spelling mistakes in some error messages. Fix them. Link: https://lore.kernel.org/r/20220319231445.21696-1-colin.i.king@gmail.com Acked-by: Manish Rangankar Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_hwi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 5521469ce678..7fe7f53a41c0 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -2398,7 +2398,7 @@ static void bnx2i_process_conn_destroy_cmpl(struct bnx2i_hba *hba, } if (hba != ep->hba) { - printk(KERN_ALERT "conn destroy- error hba mis-match\n"); + printk(KERN_ALERT "conn destroy- error hba mismatch\n"); return; } @@ -2432,7 +2432,7 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba, } if (hba != ep->hba) { - printk(KERN_ALERT "ofld_cmpl: error hba mis-match\n"); + printk(KERN_ALERT "ofld_cmpl: error hba mismatch\n"); return; } From 67bae5f28c895f8737a1974c3f31cf12b9170b14 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 29 Mar 2022 15:27:32 +0200 Subject: [PATCH 088/708] drm: of: Properly try all possible cases for bridge/panel detection While bridge/panel detection was initially relying on the usual port/ports-based of graph detection, it was recently changed to perform the lookup on any child node that is not port/ports instead when such a node is available, with no fallback on the usual way. This results in breaking detection when a child node is present but does not contain any panel or bridge node, even when the usual port/ports-based of graph is there. In order to support both situations properly, this commit reworks the logic to try both options and not just one of the two: it will only return -EPROBE_DEFER when both have failed. Signed-off-by: Paul Kocialkowski Fixes: 80253168dbfd ("drm: of: Lookup if child node has panel or bridge") Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220329132732.628474-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/drm_of.c | 101 ++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index 9d90cd75c457..8716da6369a6 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -219,6 +219,29 @@ int drm_of_encoder_active_endpoint(struct device_node *node, } EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint); +static int find_panel_or_bridge(struct device_node *node, + struct drm_panel **panel, + struct drm_bridge **bridge) +{ + if (panel) { + *panel = of_drm_find_panel(node); + if (!IS_ERR(*panel)) + return 0; + + /* Clear the panel pointer in case of error. */ + *panel = NULL; + } + + /* No panel found yet, check for a bridge next. */ + if (bridge) { + *bridge = of_drm_find_bridge(node); + if (*bridge) + return 0; + } + + return -EPROBE_DEFER; +} + /** * drm_of_find_panel_or_bridge - return connected panel or bridge device * @np: device tree node containing encoder output ports @@ -241,66 +264,44 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_panel **panel, struct drm_bridge **bridge) { - int ret = -EPROBE_DEFER; - struct device_node *remote; + struct device_node *node; + int ret; if (!panel && !bridge) return -EINVAL; + if (panel) *panel = NULL; + if (bridge) + *bridge = NULL; - /** - * Devices can also be child nodes when we also control that device - * through the upstream device (ie, MIPI-DCS for a MIPI-DSI device). - * - * Lookup for a child node of the given parent that isn't either port - * or ports. - */ - for_each_available_child_of_node(np, remote) { - if (of_node_name_eq(remote, "port") || - of_node_name_eq(remote, "ports")) + /* Check for a graph on the device node first. */ + if (of_graph_is_present(np)) { + node = of_graph_get_remote_node(np, port, endpoint); + if (node) { + ret = find_panel_or_bridge(node, panel, bridge); + of_node_put(node); + + if (!ret) + return 0; + } + } + + /* Otherwise check for any child node other than port/ports. */ + for_each_available_child_of_node(np, node) { + if (of_node_name_eq(node, "port") || + of_node_name_eq(node, "ports")) continue; - goto of_find_panel_or_bridge; + ret = find_panel_or_bridge(node, panel, bridge); + of_node_put(node); + + /* Stop at the first found occurrence. */ + if (!ret) + return 0; } - /* - * of_graph_get_remote_node() produces a noisy error message if port - * node isn't found and the absence of the port is a legit case here, - * so at first we silently check whether graph presents in the - * device-tree node. - */ - if (!of_graph_is_present(np)) - return -ENODEV; - - remote = of_graph_get_remote_node(np, port, endpoint); - -of_find_panel_or_bridge: - if (!remote) - return -ENODEV; - - if (panel) { - *panel = of_drm_find_panel(remote); - if (!IS_ERR(*panel)) - ret = 0; - else - *panel = NULL; - } - - /* No panel found yet, check for a bridge next. */ - if (bridge) { - if (ret) { - *bridge = of_drm_find_bridge(remote); - if (*bridge) - ret = 0; - } else { - *bridge = NULL; - } - - } - - of_node_put(remote); - return ret; + return -EPROBE_DEFER; } EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge); From 7414539c5f2e43bad67ae88a3612455d01583429 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Mar 2022 02:19:16 -0400 Subject: [PATCH 089/708] Revert "virtio: use virtio_device_ready() in virtio_device_restore()" This reverts commit 8d65bc9a5be3f23c5e2ab36b6b8ef40095165b18. We reverted the problematic changes, no more need for work arounds on restore. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 75c8d560bbd3..22f15f444f75 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -526,9 +526,8 @@ int virtio_device_restore(struct virtio_device *dev) goto err; } - /* If restore didn't do it, mark device DRIVER_OK ourselves. */ - if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) - virtio_device_ready(dev); + /* Finally, tell the device we're all set */ + virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); virtio_config_enable(dev); From c18c86808b78c4c2dc69f27f37c57abab14ee387 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Mar 2022 02:22:17 -0400 Subject: [PATCH 090/708] Revert "virtio_config: introduce a new .enable_cbs method" This reverts commit d50497eb4e554e1f0351e1836ee7241c059592e6. The new callback ended up not being used, and it's asymmetrical: just enable, no disable. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/virtio_config.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index dafdc7f48c01..b341dd62aa4d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -23,8 +23,6 @@ struct virtio_shm_region { * any of @get/@set, @get_status/@set_status, or @get_features/ * @finalize_features are NOT safe to be called from an atomic * context. - * @enable_cbs: enable the callbacks - * vdev: the virtio_device * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -78,7 +76,6 @@ struct virtio_shm_region { */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { - void (*enable_cbs)(struct virtio_device *vdev); void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, @@ -233,9 +230,6 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); - if (dev->config->enable_cbs) - dev->config->enable_cbs(dev); - BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } From 55ebf0d60e3cc6c9e8593399e185842c00e12f36 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 29 Mar 2022 12:21:07 +0800 Subject: [PATCH 091/708] vdpa: mlx5: prevent cvq work from hogging CPU A userspace triggerable infinite loop could happen in mlx5_cvq_kick_handler() if userspace keeps sending a huge amount of cvq requests. Fixing this by introducing a quota and re-queue the work if we're out of the budget (currently the implicit budget is one) . While at it, using a per device work struct to avoid on demand memory allocation for cvq. Fixes: 5262912ef3cfc ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20220329042109.4029-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Eli Cohen --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2f4fb09f1e89..da21e5430434 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -163,6 +163,7 @@ struct mlx5_vdpa_net { u32 cur_num_vqs; struct notifier_block nb; struct vdpa_callback config_cb; + struct mlx5_vdpa_wq_ent cvq_ent; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1659,10 +1660,10 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) - goto out; + return; if (!cvq->ready) - goto out; + return; while (true) { err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, @@ -1696,9 +1697,10 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) if (vringh_need_notify_iotlb(&cvq->vring)) vringh_notify(&cvq->vring); + + queue_work(mvdev->wq, &wqent->work); + break; } -out: - kfree(wqent); } static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) @@ -1706,7 +1708,6 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - struct mlx5_vdpa_wq_ent *wqent; if (!is_index_valid(mvdev, idx)) return; @@ -1715,13 +1716,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) if (!mvdev->wq || !mvdev->cvq.ready) return; - wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) - return; - - wqent->mvdev = mvdev; - INIT_WORK(&wqent->work, mlx5_cvq_kick_handler); - queue_work(mvdev->wq, &wqent->work); + queue_work(mvdev->wq, &ndev->cvq_ent.work); return; } @@ -2740,6 +2735,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_mr; + ndev->cvq_ent.mvdev = mvdev; + INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; From 1c80cf031e0204fde471558ee40183695773ce13 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 29 Mar 2022 12:21:08 +0800 Subject: [PATCH 092/708] vdpa: mlx5: synchronize driver status with CVQ Currently, CVQ doesn't have any synchronization with the driver status. Then CVQ emulation code run in the middle of: 1) device reset 2) device status changed 3) map updating The will lead several unexpected issue like trying to execute CVQ command after the driver has been teared down. Fixing this by using reslock to synchronize CVQ emulation code with the driver status changing: - protect the whole device reset, status changing and set_map() updating with reslock - protect the CVQ handler with the reslock and check VIRTIO_CONFIG_S_DRIVER_OK in the CVQ handler This will guarantee that: 1) CVQ handler won't work if VIRTIO_CONFIG_S_DRIVER_OK is not set 2) CVQ handler will see a consistent state of the driver instead of the partial one when it is running in the middle of the teardown_driver() or setup_driver(). Cc: 5262912ef3cfc ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20220329042109.4029-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Eli Cohen --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index da21e5430434..79001301b383 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1659,11 +1659,17 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) mvdev = wqent->mvdev; ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; + + mutex_lock(&ndev->reslock); + + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) - return; + goto out; if (!cvq->ready) - return; + goto out; while (true) { err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, @@ -1701,6 +1707,9 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) queue_work(mvdev->wq, &wqent->work); break; } + +out: + mutex_unlock(&ndev->reslock); } static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) @@ -2175,7 +2184,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb goto err_mr; if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - return 0; + goto err_mr; restore_channels_info(ndev); err = setup_driver(mvdev); @@ -2190,12 +2199,14 @@ err_mr: return err; } +/* reslock must be held for this function */ static int setup_driver(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; - mutex_lock(&ndev->reslock); + WARN_ON(!mutex_is_locked(&ndev->reslock)); + if (ndev->setup) { mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); err = 0; @@ -2225,7 +2236,6 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) goto err_fwd; } ndev->setup = true; - mutex_unlock(&ndev->reslock); return 0; @@ -2236,23 +2246,23 @@ err_tir: err_rqt: teardown_virtqueues(ndev); out: - mutex_unlock(&ndev->reslock); return err; } +/* reslock must be held for this function */ static void teardown_driver(struct mlx5_vdpa_net *ndev) { - mutex_lock(&ndev->reslock); + + WARN_ON(!mutex_is_locked(&ndev->reslock)); + if (!ndev->setup) - goto out; + return; remove_fwd_to_tir(ndev); destroy_tir(ndev); destroy_rqt(ndev); teardown_virtqueues(ndev); ndev->setup = false; -out: - mutex_unlock(&ndev->reslock); } static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) @@ -2273,6 +2283,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) print_status(mvdev, status, true); + mutex_lock(&ndev->reslock); + if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { err = setup_driver(mvdev); @@ -2282,16 +2294,19 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) } } else { mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); - return; + goto err_clear; } } ndev->mvdev.status = status; + mutex_unlock(&ndev->reslock); return; err_setup: mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; +err_clear: + mutex_unlock(&ndev->reslock); } static int mlx5_vdpa_reset(struct vdpa_device *vdev) @@ -2301,6 +2316,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) print_status(mvdev, 0, true); mlx5_vdpa_info(mvdev, "performing device reset\n"); + + mutex_lock(&ndev->reslock); teardown_driver(ndev); clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); @@ -2313,6 +2330,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) if (mlx5_vdpa_create_mr(mvdev, NULL)) mlx5_vdpa_warn(mvdev, "create MR failed\n"); } + mutex_unlock(&ndev->reslock); return 0; } @@ -2348,19 +2366,24 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); bool change_map; int err; + mutex_lock(&ndev->reslock); + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); if (err) { mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); - return err; + goto err; } if (change_map) - return mlx5_vdpa_change_map(mvdev, iotlb); + err = mlx5_vdpa_change_map(mvdev, iotlb); - return 0; +err: + mutex_unlock(&ndev->reslock); + return err; } static void mlx5_vdpa_free(struct vdpa_device *vdev) From 522574fd7864e091d473765102e866414979b2ab Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Mon, 21 Mar 2022 23:29:18 +0000 Subject: [PATCH 093/708] bpftool: Explicit errno handling in skeletons Andrii noticed that since f97b8b9bd630 ("bpftool: Fix a bug in subskeleton code generation") the subskeleton code allows bpf_object__destroy_subskeleton to overwrite the errno that subskeleton__open would return with. While this is not currently an issue, let's make it future-proof. This patch explicitly tracks err in subskeleton__open and skeleton__create (i.e. calloc failure is explicitly ENOMEM) and ensures that errno is -err on the error return path. The skeleton code had to be changed since maps and progs codegen is shared with subskeletons. Fixes: f97b8b9bd630 ("bpftool: Fix a bug in subskeleton code generation") Signed-off-by: Delyan Kratunov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/3b6bfbb770c79ae64d8de26c1c1bd9d53a4b85f8.camel@fb.com --- tools/bpf/bpftool/gen.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 91af2850b505..7678af364793 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -828,8 +828,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) s->map_cnt = %zu; \n\ s->map_skel_sz = sizeof(*s->maps); \n\ s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ - if (!s->maps) \n\ + if (!s->maps) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ ", map_cnt ); @@ -870,8 +872,10 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li s->prog_cnt = %zu; \n\ s->prog_skel_sz = sizeof(*s->progs); \n\ s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\ - if (!s->progs) \n\ + if (!s->progs) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ ", prog_cnt ); @@ -1182,10 +1186,13 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ - if (!s) \n\ + if (!s) { \n\ + err = -ENOMEM; \n\ goto err; \n\ + } \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -1206,7 +1213,7 @@ static int do_skeleton(int argc, char **argv) return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ - return -ENOMEM; \n\ + return err; \n\ } \n\ \n\ static inline const void *%2$s__elf_bytes(size_t *sz) \n\ @@ -1466,12 +1473,12 @@ static int do_subskeleton(int argc, char **argv) \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ if (!obj) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ s = (struct bpf_object_subskeleton *)calloc(1, sizeof(*s));\n\ if (!s) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ s->sz = sizeof(*s); \n\ @@ -1483,7 +1490,7 @@ static int do_subskeleton(int argc, char **argv) s->var_cnt = %2$d; \n\ s->vars = (struct bpf_var_skeleton *)calloc(%2$d, sizeof(*s->vars));\n\ if (!s->vars) { \n\ - errno = ENOMEM; \n\ + err = -ENOMEM; \n\ goto err; \n\ } \n\ ", @@ -1538,6 +1545,7 @@ static int do_subskeleton(int argc, char **argv) return obj; \n\ err: \n\ %1$s__destroy(obj); \n\ + errno = -err; \n\ return NULL; \n\ } \n\ \n\ From d31e0386a2f122b40b605eb0120a2fbcfca77868 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 30 Mar 2022 13:05:10 +0200 Subject: [PATCH 094/708] bpf: Fix sparse warnings in kprobe_multi_resolve_syms Adding missing __user tags to fix sparse warnings: kernel/trace/bpf_trace.c:2370:34: warning: incorrect type in argument 2 (different address spaces) kernel/trace/bpf_trace.c:2370:34: expected void const [noderef] __user *from kernel/trace/bpf_trace.c:2370:34: got void const *usyms kernel/trace/bpf_trace.c:2376:51: warning: incorrect type in argument 2 (different address spaces) kernel/trace/bpf_trace.c:2376:51: expected char const [noderef] __user *src kernel/trace/bpf_trace.c:2376:51: got char const * kernel/trace/bpf_trace.c:2443:49: warning: incorrect type in argument 1 (different address spaces) kernel/trace/bpf_trace.c:2443:49: expected void const *usyms kernel/trace/bpf_trace.c:2443:49: got void [noderef] __user *[assigned] usyms Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Reported-by: Alexei Starovoitov Reported-by: Jakub Kicinski Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220330110510.398558-1-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7fa2ebc07f60..d8553f46caa2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2349,11 +2349,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, } static int -kprobe_multi_resolve_syms(const void *usyms, u32 cnt, +kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt, unsigned long *addrs) { unsigned long addr, size; - const char **syms; + const char __user **syms; int err = -ENOMEM; unsigned int i; char *func; From 2609f635a20d3691e7b5725edc3bdadb7bedf8fb Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Wed, 30 Mar 2022 09:59:48 +0800 Subject: [PATCH 095/708] selftests/bpf: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Reported by coccicheck: tools/testing/selftests/bpf/progs/map_ptr_kern.c:370:21-22: WARNING comparing pointer to 0 tools/testing/selftests/bpf/progs/map_ptr_kern.c:397:21-22: WARNING comparing pointer to 0 Signed-off-by: Haowen Bai Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/1648605588-19269-1-git-send-email-baihaowen@meizu.com --- tools/testing/selftests/bpf/progs/map_ptr_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index b64df94ec476..db388f593d0a 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -367,7 +367,7 @@ static inline int check_array_of_maps(void) VERIFY(check_default(&array_of_maps->map, map)); inner_map = bpf_map_lookup_elem(array_of_maps, &key); - VERIFY(inner_map != 0); + VERIFY(inner_map != NULL); VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; @@ -394,7 +394,7 @@ static inline int check_hash_of_maps(void) VERIFY(check_default(&hash_of_maps->map, map)); inner_map = bpf_map_lookup_elem(hash_of_maps, &key); - VERIFY(inner_map != 0); + VERIFY(inner_map != NULL); VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; From a2fb49833cad07a53651c23dce508127085fae2d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 31 Mar 2022 10:11:17 +0900 Subject: [PATCH 096/708] rethook: Fix to use WRITE_ONCE() for rethook:: Handler Since the function pointered by rethook::handler never be removed when the rethook is alive, it doesn't need to use rcu_assign_pointer() to update it. Just use WRITE_ONCE(). Reported-by: Alexei Starovoitov Signed-off-by: Masami Hiramatsu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/164868907688.21983.1606862921419988152.stgit@devnote2 --- kernel/trace/rethook.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index ab463a4d2b23..b56833700d23 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -65,7 +65,7 @@ static void rethook_free_rcu(struct rcu_head *head) */ void rethook_free(struct rethook *rh) { - rcu_assign_pointer(rh->handler, NULL); + WRITE_ONCE(rh->handler, NULL); call_rcu(&rh->rcu, rethook_free_rcu); } From 4a9c7bbe2ed4d2b240674b1fb606c41d3940c412 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 29 Mar 2022 18:14:56 -0700 Subject: [PATCH 097/708] bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT The commit 7e40781cc8b7 ("bpf: verifier: Use target program's type for access verifications") fixes the verifier checking for BPF_PROG_TYPE_EXT (extension) prog such that the verifier looks for things based on the target prog type that it is extending instead of the BPF_PROG_TYPE_EXT itself. The current resolve_prog_type() returns the target prog type. It checks for nullness on prog->aux->dst_prog. However, when loading a BPF_PROG_TYPE_TRACING prog and it is tracing another bpf prog instead of a kernel function, prog->aux->dst_prog is not NULL also. In this case, the verifier should still verify as the BPF_PROG_TYPE_TRACING type instead of the traced prog type in prog->aux->dst_prog->type. An oops has been reported when tracing a struct_ops prog. A NULL dereference happened in check_return_code() when accessing the prog->aux->attach_func_proto->type and prog->aux->attach_func_proto is NULL here because the traced struct_ops prog has the "unreliable" set. This patch is to change the resolve_prog_type() to only return the target prog type if the prog being verified is BPF_PROG_TYPE_EXT. Fixes: 7e40781cc8b7 ("bpf: verifier: Use target program's type for access verifications") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220330011456.2984509-1-kafai@fb.com --- include/linux/bpf_verifier.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c1fc4af47f69..3a9d2d7cc6b7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -570,9 +570,11 @@ static inline u32 type_flag(u32 type) return type & ~BPF_BASE_TYPE_MASK; } +/* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) { - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; + return prog->type == BPF_PROG_TYPE_EXT ? + prog->aux->dst_prog->type : prog->type; } #endif /* _LINUX_BPF_VERIFIER_H */ From 0a210af6d0a0595fef566e7eeb072f10f37774be Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 29 Mar 2022 18:15:02 -0700 Subject: [PATCH 098/708] bpf: selftests: Test fentry tracing a struct_ops program This patch tests attaching an fentry prog to a struct_ops prog. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220330011502.2985292-1-kafai@fb.com --- .../selftests/bpf/prog_tests/dummy_st_ops.c | 23 +++++++++++++++++++ .../selftests/bpf/progs/trace_dummy_st_ops.c | 21 +++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c index 5aa52cc31dc2..c11832657d2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -2,6 +2,7 @@ /* Copyright (C) 2021. Huawei Technologies Co., Ltd */ #include #include "dummy_st_ops.skel.h" +#include "trace_dummy_st_ops.skel.h" /* Need to keep consistent with definition in include/linux/bpf.h */ struct bpf_dummy_ops_state { @@ -56,6 +57,7 @@ static void test_dummy_init_ptr_arg(void) .ctx_in = args, .ctx_size_in = sizeof(args), ); + struct trace_dummy_st_ops *trace_skel; struct dummy_st_ops *skel; int fd, err; @@ -64,12 +66,33 @@ static void test_dummy_init_ptr_arg(void) return; fd = bpf_program__fd(skel->progs.test_1); + + trace_skel = trace_dummy_st_ops__open(); + if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open")) + goto done; + + err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1, + fd, "test_1"); + if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)")) + goto done; + + err = trace_dummy_st_ops__load(trace_skel); + if (!ASSERT_OK(err, "load(trace_skel)")) + goto done; + + err = trace_dummy_st_ops__attach(trace_skel); + if (!ASSERT_OK(err, "attach(trace_skel)")) + goto done; + err = bpf_prog_test_run_opts(fd, &attr); ASSERT_OK(err, "test_run"); ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); ASSERT_EQ(attr.retval, exp_retval, "test_ret"); + ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val"); +done: dummy_st_ops__destroy(skel); + trace_dummy_st_ops__destroy(trace_skel); } static void test_dummy_multiple_args(void) diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c new file mode 100644 index 000000000000..00a4be9d3074 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +int val = 0; + +SEC("fentry/test_1") +int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx) +{ + __u64 state; + + /* Read the traced st_ops arg1 which is a pointer */ + bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx); + /* Read state->val */ + bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state); + + return 0; +} + +char _license[] SEC("license") = "GPL"; From e4ff77598a109bd36789ad5e80aba66fc53d0ffb Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 30 Mar 2022 19:21:23 +0530 Subject: [PATCH 099/708] powerpc/numa: Handle partially initialized numa nodes With commit 09f49dca570a ("mm: handle uninitialized numa nodes gracefully") NODE_DATA for even a memoryless/cpuless node is partially initialized at boot time. Before onlining the node, current Powerpc code checks for NODE_DATA to be NULL. However since NODE_DATA is partially initialized, this check will end up always being false. This causes hotplugging a CPU to a memoryless/cpuless node to fail. Before adding CPUs: $ numactl -H available: 1 nodes (4) node 4 cpus: 0 1 2 3 4 5 6 7 node 4 size: 97372 MB node 4 free: 95545 MB node distances: node 4 4: 10 $ lparstat System Configuration type=Dedicated mode=Capped smt=8 lcpu=1 mem=99709440 kB cpus=0 ent=1.00 %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 2.66 2.67 0.16 94.51 0.00 0.00 5.33 0.00 67749 0 After hotplugging 32 cores: $ numactl -H node 4 cpus: 0 1 2 3 4 5 6 7 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 node 4 size: 97372 MB node 4 free: 93636 MB node distances: node 4 4: 10 $ lparstat System Configuration type=Dedicated mode=Capped smt=8 lcpu=33 mem=99709440 kB cpus=0 ent=33.00 %user %sys %wait %idle physc %entc lbusy app vcsw phint ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 0.04 0.02 0.00 99.94 0.00 0.00 0.06 0.00 1128751 3 As we can see numactl is listing only 8 cores while lparstat is showing 33 cores. Also dmesg is showing messages like: [ 2261.318350 ] BUG: arch topology borken [ 2261.318357 ] the DIE domain not a subset of the NODE domain Fixes: 09f49dca570a ("mm: handle uninitialized numa nodes gracefully") Reported-by: Geetika Moolchandani Signed-off-by: Srikar Dronamraju Acked-by: Michal Hocko Acked-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220330135123.1868197-1-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9b7fefbb64b..13022d734951 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu) if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; - if (NODE_DATA(new_nid) == NULL) { + if (!node_online(new_nid)) { #ifdef CONFIG_MEMORY_HOTPLUG /* * Need to ensure that NODE_DATA is initialized for a node from From 6b8a94332ee4f7d9a8ae0cbac7609f79c212f06c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:01 -0400 Subject: [PATCH 100/708] nfsd: Fix a write performance regression The call to filemap_flush() in nfsd_file_put() is there to ensure that we clear out any writes belonging to a NFSv3 client relatively quickly and avoid situations where the file can't be evicted by the garbage collector. It also ensures that we detect write errors quickly. The problem is this causes a regression in performance for some workloads. So try to improve matters by deferring writeback until we're ready to close the file, and need to detect errors so that we can force the client to resend. Tested-by: Jan Kara Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()") Signed-off-by: Trond Myklebust Link: https://lore.kernel.org/all/20220330103457.r4xrhy2d6nhtouzk@quack3.lan Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index cc2831cec669..496f7b3f7523 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } +static void +nfsd_file_flush(struct nfsd_file *nf) +{ + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf) return; } - filemap_flush(nf->nf_file->f_mapping); is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) + if (!is_hashed) { + nfsd_file_flush(nf); + nfsd_file_put_noref(nf); + } else { + nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); + } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); } @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); nfsd_file_put_noref(nf); } } @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) From 999397926ab3f78c7d1235cc4ca6e3c89d2769bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Mar 2022 09:54:02 -0400 Subject: [PATCH 101/708] nfsd: Clean up nfsd_file_put() Make it a little less racy, by removing the refcount_read() test. Then remove the redundant 'is_hashed' variable. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 496f7b3f7523..8f7ed5dbb003 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -301,21 +301,14 @@ nfsd_file_put_noref(struct nfsd_file *nf) void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed; - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { - nfsd_file_put_noref(nf); - return; - } - - is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - if (!is_hashed) { + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); } else { nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); + if (nf->nf_file) + nfsd_file_schedule_laundrette(); } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); From 55037ed7bdc62151a726f5685f88afa6a82959b1 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: [PATCH 102/708] uapi/linux/stddef.h: Add include guards Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a284..7837ba4fe728 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif From 037250f0a45cf9ecf5b52d4b9ff8eadeb609c800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 30 Mar 2022 18:44:09 +0200 Subject: [PATCH 103/708] ath9k: Properly clear TX status area before reporting to mac80211 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ath9k driver was not properly clearing the status area in the ieee80211_tx_info struct before reporting TX status to mac80211. Instead, it was manually filling in fields, which meant that fields introduced later were left as-is. Conveniently, mac80211 actually provides a helper to zero out the status area, so use that to make sure we zero everything. The last commit touching the driver function writing the status information seems to have actually been fixing an issue that was also caused by the area being uninitialised; but it only added clearing of a single field instead of the whole struct. That is now redundant, though, so revert that commit and use it as a convenient Fixes tag. Fixes: cc591d77aba1 ("ath9k: Make sure to zero status.tx_time before reporting TX status") Reported-by: Bagas Sanjaya Cc: Signed-off-by: Toke Høiland-Jørgensen Tested-by: Bagas Sanjaya Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220330164409.16645-1-toke@toke.dk --- drivers/net/wireless/ath/ath9k/xmit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index d0caf1de2bde..cbcf96ac303e 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2553,6 +2553,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ieee80211_tx_info_clear_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2595,9 +2597,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, } tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) From 598be865ee00905f16ef3d0355fefe319cac981a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Mar 2022 11:40:30 +0200 Subject: [PATCH 104/708] MAINTAINERS: claim include/uapi/linux/wireless.h As much as I don't really want to maintain this legacy cruft that we started replacing 15+ years ago, for now it still falls on me to take care of it. Add a missing file to the list. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220328114029.526fbb42784d.If7c79b4ca827dfe82a545689f2d31fcedabd8387@changeid --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e406a6db67d0..ca19e2d9a074 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -201,6 +201,7 @@ F: include/net/ieee80211_radiotap.h F: include/net/iw_handler.h F: include/net/wext.h F: include/uapi/linux/nl80211.h +F: include/uapi/linux/wireless.h F: net/wireless/ 8169 10/100/1000 GIGABIT ETHERNET DRIVER From 61a891efbb1099bb7bdcedfc50f802fabbe46a0e Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 30 Mar 2022 17:40:46 +0300 Subject: [PATCH 105/708] MAINTAINERS: mark wil6210 as orphan Maya is not working on wil6210 anymore so mark it as orphan. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220330144046.11229-1-kvalo@kernel.org --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca19e2d9a074..cc7a6a65eecb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20965,10 +20965,8 @@ S: Maintained F: drivers/hid/hid-wiimote* WILOCITY WIL6210 WIRELESS DRIVER -M: Maya Erez L: linux-wireless@vger.kernel.org -L: wil6210@qti.qualcomm.com -S: Supported +S: Orphan W: https://wireless.wiki.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ From 059a47f1da93811d37533556d67e72f2261b1127 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 30 Mar 2022 16:37:03 +0000 Subject: [PATCH 106/708] net: sfc: add missing xdp queue reinitialization After rx/tx ring buffer size is changed, kernel panic occurs when it acts XDP_TX or XDP_REDIRECT. When tx/rx ring buffer size is changed(ethtool -G), sfc driver reallocates and reinitializes rx and tx queues and their buffer (tx_queue->buffer). But it misses reinitializing xdp queues(efx->xdp_tx_queues). So, while it is acting XDP_TX or XDP_REDIRECT, it uses the uninitialized tx_queue->buffer. A new function efx_set_xdp_channels() is separated from efx_set_channels() to handle only xdp queues. Splat looks like: BUG: kernel NULL pointer dereference, address: 000000000000002a #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#4] PREEMPT SMP NOPTI RIP: 0010:efx_tx_map_chunk+0x54/0x90 [sfc] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G D 5.17.0+ #55 e8beeee8289528f11357029357cf Code: 48 8b 8d a8 01 00 00 48 8d 14 52 4c 8d 2c d0 44 89 e0 48 85 c9 74 0e 44 89 e2 4c 89 f6 48 80 RSP: 0018:ffff92f121e45c60 EFLAGS: 00010297 RIP: 0010:efx_tx_map_chunk+0x54/0x90 [sfc] RAX: 0000000000000040 RBX: ffff92ea506895c0 RCX: ffffffffc0330870 RDX: 0000000000000001 RSI: 00000001139b10ce RDI: ffff92ea506895c0 RBP: ffffffffc0358a80 R08: 00000001139b110d R09: 0000000000000000 R10: 0000000000000001 R11: ffff92ea414c0088 R12: 0000000000000040 R13: 0000000000000018 R14: 00000001139b10ce R15: ffff92ea506895c0 FS: 0000000000000000(0000) GS:ffff92f121ec0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Code: 48 8b 8d a8 01 00 00 48 8d 14 52 4c 8d 2c d0 44 89 e0 48 85 c9 74 0e 44 89 e2 4c 89 f6 48 80 CR2: 000000000000002a CR3: 00000003e6810004 CR4: 00000000007706e0 RSP: 0018:ffff92f121e85c60 EFLAGS: 00010297 PKRU: 55555554 RAX: 0000000000000040 RBX: ffff92ea50689700 RCX: ffffffffc0330870 RDX: 0000000000000001 RSI: 00000001145a90ce RDI: ffff92ea50689700 RBP: ffffffffc0358a80 R08: 00000001145a910d R09: 0000000000000000 R10: 0000000000000001 R11: ffff92ea414c0088 R12: 0000000000000040 R13: 0000000000000018 R14: 00000001145a90ce R15: ffff92ea50689700 FS: 0000000000000000(0000) GS:ffff92f121e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000002a CR3: 00000003e6810005 CR4: 00000000007706e0 PKRU: 55555554 Call Trace: efx_xdp_tx_buffers+0x12b/0x3d0 [sfc 84c94b8e32d44d296c17e10a634d3ad454de4ba5] __efx_rx_packet+0x5c3/0x930 [sfc 84c94b8e32d44d296c17e10a634d3ad454de4ba5] efx_rx_packet+0x28c/0x2e0 [sfc 84c94b8e32d44d296c17e10a634d3ad454de4ba5] efx_ef10_ev_process+0x5f8/0xf40 [sfc 84c94b8e32d44d296c17e10a634d3ad454de4ba5] ? enqueue_task_fair+0x95/0x550 efx_poll+0xc4/0x360 [sfc 84c94b8e32d44d296c17e10a634d3ad454de4ba5] Fixes: 3990a8fffbda ("sfc: allocate channels for XDP tx queues") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 146 +++++++++++++----------- 1 file changed, 81 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index f9064532beb6..83e27231fbe6 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -786,6 +786,85 @@ void efx_remove_channels(struct efx_nic *efx) kfree(efx->xdp_tx_queues); } +static int efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, + struct efx_tx_queue *tx_queue) +{ + if (xdp_queue_number >= efx->xdp_tx_queue_count) + return -EINVAL; + + netif_dbg(efx, drv, efx->net_dev, + "Channel %u TXQ %u is XDP %u, HW %u\n", + tx_queue->channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + return 0; +} + +static void efx_set_xdp_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + unsigned int next_queue = 0; + int xdp_queue_number = 0; + int rc; + + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate + * RX-only and TX-only channels. + */ + efx_for_each_channel(channel, efx) { + if (channel->channel < efx->tx_channel_offset) + continue; + + if (efx_channel_is_xdp_tx(channel)) { + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue = next_queue++; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, + tx_queue); + if (rc == 0) + xdp_queue_number++; + } + } else { + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue = next_queue++; + netif_dbg(efx, drv, efx->net_dev, + "Channel %u TXQ %u is HW %u\n", + channel->channel, tx_queue->label, + tx_queue->queue); + } + + /* If XDP is borrowing queues from net stack, it must + * use the queue with no csum offload, which is the + * first one of the channel + * (note: tx_queue_by_type is not initialized yet) + */ + if (efx->xdp_txq_queues_mode == + EFX_XDP_TX_QUEUES_BORROWED) { + tx_queue = &channel->tx_queue[0]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, + tx_queue); + if (rc == 0) + xdp_queue_number++; + } + } + } + WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number > efx->xdp_tx_queue_count); + + /* If we have more CPUs than assigned XDP TX queues, assign the already + * existing queues to the exceeding CPUs + */ + next_queue = 0; + while (xdp_queue_number < efx->xdp_tx_queue_count) { + tx_queue = efx->xdp_tx_queues[next_queue++]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; + } +} + int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) { struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; @@ -857,6 +936,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) efx_init_napi_channel(efx->channel[i]); } + efx_set_xdp_channels(efx); out: /* Destroy unused channel structures */ for (i = 0; i < efx->n_channels; i++) { @@ -889,26 +969,9 @@ rollback: goto out; } -static inline int -efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, - struct efx_tx_queue *tx_queue) -{ - if (xdp_queue_number >= efx->xdp_tx_queue_count) - return -EINVAL; - - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - tx_queue->channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; - return 0; -} - int efx_set_channels(struct efx_nic *efx) { - struct efx_tx_queue *tx_queue; struct efx_channel *channel; - unsigned int next_queue = 0; - int xdp_queue_number; int rc; efx->tx_channel_offset = @@ -926,61 +989,14 @@ int efx_set_channels(struct efx_nic *efx) return -ENOMEM; } - /* We need to mark which channels really have RX and TX - * queues, and adjust the TX queue numbers if we have separate - * RX-only and TX-only channels. - */ - xdp_queue_number = 0; efx_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels) channel->rx_queue.core_index = channel->channel; else channel->rx_queue.core_index = -1; - - if (channel->channel >= efx->tx_channel_offset) { - if (efx_channel_is_xdp_tx(channel)) { - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue = next_queue++; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } - } else { - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue = next_queue++; - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n", - channel->channel, tx_queue->label, - tx_queue->queue); - } - - /* If XDP is borrowing queues from net stack, it must use the queue - * with no csum offload, which is the first one of the channel - * (note: channel->tx_queue_by_type is not initialized yet) - */ - if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { - tx_queue = &channel->tx_queue[0]; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } - } - } } - WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && - xdp_queue_number != efx->xdp_tx_queue_count); - WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && - xdp_queue_number > efx->xdp_tx_queue_count); - /* If we have more CPUs than assigned XDP TX queues, assign the already - * existing queues to the exceeding CPUs - */ - next_queue = 0; - while (xdp_queue_number < efx->xdp_tx_queue_count) { - tx_queue = efx->xdp_tx_queues[next_queue++]; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } + efx_set_xdp_channels(efx); rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) From 9381fe8c849cfbe50245ac01fc077554f6eaa0e2 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 31 Mar 2022 15:04:28 +0800 Subject: [PATCH 107/708] net/tls: fix slab-out-of-bounds bug in decrypt_internal The memory size of tls_ctx->rx.iv for AES128-CCM is 12 setting in tls_set_sw_offload(). The return value of crypto_aead_ivsize() for "ccm(aes)" is 16. So memcpy() require 16 bytes from 12 bytes memory space will trigger slab-out-of-bounds bug as following: ================================================================== BUG: KASAN: slab-out-of-bounds in decrypt_internal+0x385/0xc40 [tls] Read of size 16 at addr ffff888114e84e60 by task tls/10911 Call Trace: dump_stack_lvl+0x34/0x44 print_report.cold+0x5e/0x5db ? decrypt_internal+0x385/0xc40 [tls] kasan_report+0xab/0x120 ? decrypt_internal+0x385/0xc40 [tls] kasan_check_range+0xf9/0x1e0 memcpy+0x20/0x60 decrypt_internal+0x385/0xc40 [tls] ? tls_get_rec+0x2e0/0x2e0 [tls] ? process_rx_list+0x1a5/0x420 [tls] ? tls_setup_from_iter.constprop.0+0x2e0/0x2e0 [tls] decrypt_skb_update+0x9d/0x400 [tls] tls_sw_recvmsg+0x3c8/0xb50 [tls] Allocated by task 10911: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 tls_set_sw_offload+0x2eb/0xa20 [tls] tls_setsockopt+0x68c/0x700 [tls] __sys_setsockopt+0xfe/0x1b0 Replace the crypto_aead_ivsize() with prot->iv_size + prot->salt_size when memcpy() iv value in TLS_1_3_VERSION scenario. Fixes: f295b3ae9f59 ("net/tls: Add support of AES128-CCM based ciphers") Signed-off-by: Ziyang Xuan Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0024a692f0f8..a8976ef95528 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1496,7 +1496,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (prot->version == TLS_1_3_VERSION || prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) memcpy(iv + iv_offset, tls_ctx->rx.iv, - crypto_aead_ivsize(ctx->aead_recv)); + prot->iv_size + prot->salt_size); else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); From 012d69fbfcc739f846766c1da56ef8b493b803b5 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 31 Mar 2022 10:26:43 +0300 Subject: [PATCH 108/708] vrf: fix packet sniffing for traffic originating from ip tunnels in commit 048939088220 ("vrf: add mac header for tunneled packets when sniffer is attached") an Ethernet header was cooked for traffic originating from tunnel devices. However, the header is added based on whether the mac_header is unset and ignores cases where the device doesn't expose a mac header to upper layers, such as in ip tunnels like ipip and gre. Traffic originating from such devices still appears garbled when capturing on the vrf device. Fix by observing whether the original device exposes a header to upper layers, similar to the logic done in af_packet. In addition, skb->mac_len needs to be adjusted after adding the Ethernet header for the skb_push/pull() surrounding dev_queue_xmit_nit() to work on these packets. Fixes: 048939088220 ("vrf: add mac header for tunneled packets when sniffer is attached") Signed-off-by: Eyal Birger Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 85e362461d71..cfc30ce4c6e1 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1265,6 +1265,7 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. @@ -1294,9 +1295,9 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, - u16 proto) + u16 proto, struct net_device *orig_dev) { - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); @@ -1402,6 +1403,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { + struct net_device *orig_dev = skb->dev; + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1410,7 +1413,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, - ETH_P_IPV6); + ETH_P_IPV6, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); @@ -1440,6 +1444,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { + struct net_device *orig_dev = skb->dev; + skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; @@ -1460,7 +1466,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, if (!list_empty(&vrf_dev->ptype_all)) { int err; - err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP); + err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); From 1effe8ca4e34c34cdd9318436a4232dcb582ebf4 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 31 Mar 2022 11:24:41 +0100 Subject: [PATCH 109/708] skbuff: fix coalescing for page_pool fragment recycling Fix a use-after-free when using page_pool with page fragments. We encountered this problem during normal RX in the hns3 driver: (1) Initially we have three descriptors in the RX queue. The first one allocates PAGE1 through page_pool, and the other two allocate one half of PAGE2 each. Page references look like this: RX_BD1 _______ PAGE1 RX_BD2 _______ PAGE2 RX_BD3 _________/ (2) Handle RX on the first descriptor. Allocate SKB1, eventually added to the receive queue by tcp_queue_rcv(). (3) Handle RX on the second descriptor. Allocate SKB2 and pass it to netif_receive_skb(): netif_receive_skb(SKB2) ip_rcv(SKB2) SKB3 = skb_clone(SKB2) SKB2 and SKB3 share a reference to PAGE2 through skb_shinfo()->dataref. The other ref to PAGE2 is still held by RX_BD3: SKB2 ---+- PAGE2 SKB3 __/ / RX_BD3 _________/ (3b) Now while handling TCP, coalesce SKB3 with SKB1: tcp_v4_rcv(SKB3) tcp_try_coalesce(to=SKB1, from=SKB3) // succeeds kfree_skb_partial(SKB3) skb_release_data(SKB3) // drops one dataref SKB1 _____ PAGE1 \____ SKB2 _____ PAGE2 / RX_BD3 _________/ In skb_try_coalesce(), __skb_frag_ref() takes a page reference to PAGE2, where it should instead have increased the page_pool frag reference, pp_frag_count. Without coalescing, when releasing both SKB2 and SKB3, a single reference to PAGE2 would be dropped. Now when releasing SKB1 and SKB2, two references to PAGE2 will be dropped, resulting in underflow. (3c) Drop SKB2: af_packet_rcv(SKB2) consume_skb(SKB2) skb_release_data(SKB2) // drops second dataref page_pool_return_skb_page(PAGE2) // drops one pp_frag_count SKB1 _____ PAGE1 \____ PAGE2 / RX_BD3 _________/ (4) Userspace calls recvmsg() Copies SKB1 and releases it. Since SKB3 was coalesced with SKB1, we release the SKB3 page as well: tcp_eat_recv_skb(SKB1) skb_release_data(SKB1) page_pool_return_skb_page(PAGE1) page_pool_return_skb_page(PAGE2) // drops second pp_frag_count (5) PAGE2 is freed, but the third RX descriptor was still using it! In our case this causes IOMMU faults, but it would silently corrupt memory if the IOMMU was disabled. Change the logic that checks whether pp_recycle SKBs can be coalesced. We still reject differing pp_recycle between 'from' and 'to' SKBs, but in order to avoid the situation described above, we also reject coalescing when both 'from' and 'to' are pp_recycled and 'from' is cloned. The new logic allows coalescing a cloned pp_recycle SKB into a page refcounted one, because in this case the release (4) will drop the right reference, the one taken by skb_try_coalesce(). Fixes: 53e0961da1c7 ("page_pool: add frag page recycling support in page pool") Suggested-by: Alexander Duyck Signed-off-by: Jean-Philippe Brucker Reviewed-by: Yunsheng Lin Reviewed-by: Alexander Duyck Acked-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 10bde7c6db44..30b523fa4ad2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5276,11 +5276,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; - /* The page pool signature of struct page will eventually figure out - * which pages can be recycled or not but for now let's prohibit slab - * allocated and page_pool allocated SKBs from being coalesced. + /* In general, avoid mixing slab allocated and page_pool allocated + * pages within the same SKB. However when @to is not pp_recycle and + * @from is cloned, we can transition frag pages from page_pool to + * reference counted. + * + * On the other hand, don't allow coalescing two pp_recycle SKBs if + * @from is cloned, in case the SKB is using page_pool fragment + * references (PP_FLAG_PAGE_FRAG). Since we only take full page + * references for cloned SKBs at the moment that would result in + * inconsistent reference counts. */ - if (to->pp_recycle != from->pp_recycle) + if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { From 066dfc4290406b1b0b014ae3267d4266a344efd1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 31 Mar 2022 16:28:54 +0300 Subject: [PATCH 110/708] Revert "net: dsa: stop updating master MTU from master.c" This reverts commit a1ff94c2973c43bc1e2677ac63ebb15b1d1ff846. Switch drivers that don't implement ->port_change_mtu() will cause the DSA master to remain with an MTU of 1500, since we've deleted the other code path. In turn, this causes a regression for those systems, where MTU-sized traffic can no longer be terminated. Revert the change taking into account the fact that rtnl_lock() is now taken top-level from the callers of dsa_master_setup() and dsa_master_teardown(). Also add a comment in order for it to be absolutely clear why it is still needed. Fixes: a1ff94c2973c ("net: dsa: stop updating master MTU from master.c") Reported-by: Luiz Angelo Daros de Luca Signed-off-by: Vladimir Oltean Tested-by: Luiz Angelo Daros de Luca Signed-off-by: David S. Miller --- net/dsa/master.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 991c2930d631..2851e44c4cf0 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -335,11 +335,24 @@ static const struct attribute_group dsa_group = { .attrs = dsa_slave_attrs, }; +static void dsa_master_reset_mtu(struct net_device *dev) +{ + int err; + + err = dev_set_mtu(dev, ETH_DATA_LEN); + if (err) + netdev_dbg(dev, + "Unable to reset MTU to exclude DSA overheads\n"); +} + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; struct device_link *consumer_link; - int ret; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); /* The DSA master must use SET_NETDEV_DEV for this to work. */ consumer_link = device_link_add(ds->dev, dev->dev.parent, @@ -349,6 +362,15 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) "Failed to create a device link to DSA switch %s\n", dev_name(ds->dev)); + /* The switch driver may not implement ->port_change_mtu(), case in + * which dsa_slave_change_mtu() will not update the master MTU either, + * so we need to do that here. + */ + ret = dev_set_mtu(dev, mtu); + if (ret) + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); + /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. @@ -384,6 +406,7 @@ void dsa_master_teardown(struct net_device *dev) sysfs_remove_group(&dev->dev.kobj, &dsa_group); dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); + dsa_master_reset_mtu(dev); dsa_master_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; From bd8c624c0cd59de0032752ba3001c107bba97f7b Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 31 Mar 2022 09:20:06 -0700 Subject: [PATCH 111/708] ice: Clear default forwarding VSI during VSI release VSI is set as default forwarding one when promisc mode is set for PF interface, when PF is switched to switchdev mode or when VF driver asks to enable allmulticast or promisc mode for the VF interface (when vf-true-promisc-support priv flag is off). The third case is buggy because in that case VSI associated with VF remains as default one after VF removal. Reproducer: 1. Create VF echo 1 > sys/class/net/ens7f0/device/sriov_numvfs 2. Enable allmulticast or promisc mode on VF ip link set ens7f0v0 allmulticast on ip link set ens7f0v0 promisc on 3. Delete VF echo 0 > sys/class/net/ens7f0/device/sriov_numvfs 4. Try to enable promisc mode on PF ip link set ens7f0 promisc on Although it looks that promisc mode on PF is enabled the opposite is true because ice_vsi_sync_fltr() responsible for IFF_PROMISC handling first checks if any other VSI is set as default forwarding one and if so the function does not do anything. At this point it is not possible to enable promisc mode on PF without re-probe device. To resolve the issue this patch clear default forwarding VSI during ice_vsi_release() when the VSI to be released is the default one. Fixes: 01b5e89aab49 ("ice: Add VF promiscuous support") Signed-off-by: Ivan Vecera Reviewed-by: Michal Swiatkowski Reviewed-by: Maciej Fijalkowski Signed-off-by: Alice Michael Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index b897926f817d..6d6233204388 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2983,6 +2983,8 @@ int ice_vsi_release(struct ice_vsi *vsi) } } + if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) + ice_clear_dflt_vsi(pf->first_sw); ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); From 2c0069f3f91f125b1b2ce66cc6bea8eb134723c3 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 31 Mar 2022 09:20:07 -0700 Subject: [PATCH 112/708] ice: Fix MAC address setting Commit 2ccc1c1ccc671b ("ice: Remove excess error variables") merged the usage of 'status' and 'err' variables into single one in function ice_set_mac_address(). Unfortunately this causes a regression when call of ice_fltr_add_mac() returns -EEXIST because this return value does not indicate an error in this case but value of 'err' remains to be -EEXIST till the end of the function and is returned to caller. Prior mentioned commit this does not happen because return value of ice_fltr_add_mac() was stored to 'status' variable first and if it was -EEXIST then 'err' remains to be zero. Fix the problem by reset 'err' to zero when ice_fltr_add_mac() returns -EEXIST. Fixes: 2ccc1c1ccc671b ("ice: Remove excess error variables") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Acked-by: Alexander Lobakin Signed-off-by: Alice Michael Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b588d7995631..d755ce07869f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5475,16 +5475,19 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) /* Add filter for new MAC. If filter exists, return success */ err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); - if (err == -EEXIST) + if (err == -EEXIST) { /* Although this MAC filter is already present in hardware it's * possible in some cases (e.g. bonding) that dev_addr was * modified outside of the driver and needs to be restored back * to this value. */ netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); - else if (err) + + return 0; + } else if (err) { /* error if the new filter addition failed */ err = -EADDRNOTAVAIL; + } err_update_filters: if (err) { From 1273f89578f268ea705ddbad60c4bd2dcff80611 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 31 Mar 2022 09:20:08 -0700 Subject: [PATCH 113/708] ice: Fix broken IFF_ALLMULTI handling Handling of all-multicast flag and associated multicast promiscuous mode is broken in ice driver. When an user switches allmulticast flag on or off the driver checks whether any VLANs are configured over the interface (except default VLAN 0). If any extra VLANs are registered it enables multicast promiscuous mode for all these VLANs (including default VLAN 0) using ICE_SW_LKUP_PROMISC_VLAN look-up type. In this situation all multicast packets tagged with known VLAN ID or untagged are received and multicast packets tagged with unknown VLAN ID ignored. If no extra VLANs are registered (so only VLAN 0 exists) it enables multicast promiscuous mode for VLAN 0 and uses ICE_SW_LKUP_PROMISC look-up type. In this situation any multicast packets including tagged ones are received. The driver handles IFF_ALLMULTI in ice_vsi_sync_fltr() this way: ice_vsi_sync_fltr() { ... if (changed_flags & IFF_ALLMULTI) { if (netdev->flags & IFF_ALLMULTI) { if (vsi->num_vlans > 1) ice_set_promisc(..., ICE_MCAST_VLAN_PROMISC_BITS); else ice_set_promisc(..., ICE_MCAST_PROMISC_BITS); } else { if (vsi->num_vlans > 1) ice_clear_promisc(..., ICE_MCAST_VLAN_PROMISC_BITS); else ice_clear_promisc(..., ICE_MCAST_PROMISC_BITS); } } ... } The code above depends on value vsi->num_vlan that specifies number of VLANs configured over the interface (including VLAN 0) and this is problem because that value is modified in NDO callbacks ice_vlan_rx_add_vid() and ice_vlan_rx_kill_vid(). Scenario 1: 1. ip link set ens7f0 allmulticast on 2. ip link add vlan10 link ens7f0 type vlan id 10 3. ip link set ens7f0 allmulticast off 4. ip link set ens7f0 allmulticast on [1] In this scenario IFF_ALLMULTI is enabled and the driver calls ice_set_promisc(..., ICE_MCAST_PROMISC_BITS) that installs multicast promisc rule with non-VLAN look-up type. [2] Then VLAN with ID 10 is added and vsi->num_vlan incremented to 2 [3] Command switches IFF_ALLMULTI off and the driver calls ice_clear_promisc(..., ICE_MCAST_VLAN_PROMISC_BITS) but this call is effectively NOP because it looks for multicast promisc rules for VLAN 0 and VLAN 10 with VLAN look-up type but no such rules exist. So the all-multicast remains enabled silently in hardware. [4] Command tries to switch IFF_ALLMULTI on and the driver calls ice_clear_promisc(..., ICE_MCAST_PROMISC_BITS) but this call fails (-EEXIST) because non-VLAN multicast promisc rule already exists. Scenario 2: 1. ip link add vlan10 link ens7f0 type vlan id 10 2. ip link set ens7f0 allmulticast on 3. ip link add vlan20 link ens7f0 type vlan id 20 4. ip link del vlan10 ; ip link del vlan20 5. ip link set ens7f0 allmulticast off [1] VLAN with ID 10 is added and vsi->num_vlan==2 [2] Command switches IFF_ALLMULTI on and driver installs multicast promisc rules with VLAN look-up type for VLAN 0 and 10 [3] VLAN with ID 20 is added and vsi->num_vlan==3 but no multicast promisc rules is added for this new VLAN so the interface does not receive MC packets from VLAN 20 [4] Both VLANs are removed but multicast rule for VLAN 10 remains installed so interface receives multicast packets from VLAN 10 [5] Command switches IFF_ALLMULTI off and because vsi->num_vlan is 1 the driver tries to remove multicast promisc rule for VLAN 0 with non-VLAN look-up that does not exist. All-multicast looks disabled from user point of view but it is partially enabled in HW (interface receives all multicast packets either untagged or tagged with VLAN ID 10) To resolve these issues the patch introduces these changes: 1. Adds handling for IFF_ALLMULTI to ice_vlan_rx_add_vid() and ice_vlan_rx_kill_vid() callbacks. So when VLAN is added/removed and IFF_ALLMULTI is enabled an appropriate multicast promisc rule for that VLAN ID is added/removed. 2. In ice_vlan_rx_add_vid() when first VLAN besides VLAN 0 is added so (vsi->num_vlan == 2) and IFF_ALLMULTI is enabled then look-up type for existing multicast promisc rule for VLAN 0 is updated to ICE_MCAST_VLAN_PROMISC_BITS. 3. In ice_vlan_rx_kill_vid() when last VLAN besides VLAN 0 is removed so (vsi->num_vlan == 1) and IFF_ALLMULTI is enabled then look-up type for existing multicast promisc rule for VLAN 0 is updated to ICE_MCAST_PROMISC_BITS. 4. Both ice_vlan_rx_{add,kill}_vid() have to run under ICE_CFG_BUSY bit protection to avoid races with ice_vsi_sync_fltr() that runs in ice_service_task() context. 5. Bit ICE_VSI_VLAN_FLTR_CHANGED is use-less and can be removed. 6. Error messages added to ice_fltr_*_vsi_promisc() helper functions to avoid them in their callers 7. Small improvements to increase readability Fixes: 5eda8afd6bcc ("ice: Add support for PF/VF promiscuous mode") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Signed-off-by: Alice Michael Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_fltr.c | 44 ++++++++- drivers/net/ethernet/intel/ice/ice_main.c | 114 +++++++++++++++------- 3 files changed, 121 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d4f1874df7d0..26eaee0b6503 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -301,7 +301,6 @@ enum ice_vsi_state { ICE_VSI_NETDEV_REGISTERED, ICE_VSI_UMAC_FLTR_CHANGED, ICE_VSI_MMAC_FLTR_CHANGED, - ICE_VSI_VLAN_FLTR_CHANGED, ICE_VSI_PROMISC_CHANGED, ICE_VSI_STATE_NBITS /* must be last */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index af57eb114966..85a94483c2ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -58,7 +58,16 @@ int ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi, u8 promisc_mask) { - return ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false); + struct ice_pf *pf = hw->back; + int result; + + result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false); + if (result) + dev_err(ice_pf_to_dev(pf), + "Error setting promisc mode on VSI %i (rc=%d)\n", + vsi->vsi_num, result); + + return result; } /** @@ -73,7 +82,16 @@ int ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi, u8 promisc_mask) { - return ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true); + struct ice_pf *pf = hw->back; + int result; + + result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true); + if (result) + dev_err(ice_pf_to_dev(pf), + "Error clearing promisc mode on VSI %i (rc=%d)\n", + vsi->vsi_num, result); + + return result; } /** @@ -87,7 +105,16 @@ int ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid) { - return ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid); + struct ice_pf *pf = hw->back; + int result; + + result = ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid); + if (result) + dev_err(ice_pf_to_dev(pf), + "Error clearing promisc mode on VSI %i for VID %u (rc=%d)\n", + ice_get_hw_vsi_num(hw, vsi_handle), vid, result); + + return result; } /** @@ -101,7 +128,16 @@ int ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid) { - return ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid); + struct ice_pf *pf = hw->back; + int result; + + result = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid); + if (result) + dev_err(ice_pf_to_dev(pf), + "Error setting promisc mode on VSI %i for VID %u (rc=%d)\n", + ice_get_hw_vsi_num(hw, vsi_handle), vid, result); + + return result; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d755ce07869f..1d2ca39add95 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -243,8 +243,7 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) { return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || - test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || - test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); + test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); } /** @@ -260,10 +259,15 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) if (vsi->type != ICE_VSI_PF) return 0; - if (ice_vsi_has_non_zero_vlans(vsi)) - status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); - else - status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); + if (ice_vsi_has_non_zero_vlans(vsi)) { + promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX); + status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, + promisc_m); + } else { + status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, + promisc_m, 0); + } + return status; } @@ -280,10 +284,15 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) if (vsi->type != ICE_VSI_PF) return 0; - if (ice_vsi_has_non_zero_vlans(vsi)) - status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); - else - status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); + if (ice_vsi_has_non_zero_vlans(vsi)) { + promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX); + status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, + promisc_m); + } else { + status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + promisc_m, 0); + } + return status; } @@ -302,7 +311,6 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; u32 changed_flags = 0; - u8 promisc_m; int err; if (!vsi->netdev) @@ -320,7 +328,6 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (ice_vsi_fltr_changed(vsi)) { clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); - clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); /* grab the netdev's addr_list_lock */ netif_addr_lock_bh(netdev); @@ -369,29 +376,15 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { if (vsi->current_netdev_flags & IFF_ALLMULTI) { - if (ice_vsi_has_non_zero_vlans(vsi)) - promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_MCAST_PROMISC_BITS; - - err = ice_set_promisc(vsi, promisc_m); + err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS); if (err) { - netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n", - vsi->vsi_num); vsi->current_netdev_flags &= ~IFF_ALLMULTI; goto out_promisc; } } else { /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */ - if (ice_vsi_has_non_zero_vlans(vsi)) - promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_MCAST_PROMISC_BITS; - - err = ice_clear_promisc(vsi, promisc_m); + err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS); if (err) { - netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n", - vsi->vsi_num); vsi->current_netdev_flags |= IFF_ALLMULTI; goto out_promisc; } @@ -3488,6 +3481,20 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) if (!vid) return 0; + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) + usleep_range(1000, 2000); + + /* Add multicast promisc rule for the VLAN ID to be added if + * all-multicast is currently enabled. + */ + if (vsi->current_netdev_flags & IFF_ALLMULTI) { + ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, + vid); + if (ret) + goto finish; + } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged @@ -3495,8 +3502,23 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) */ vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); ret = vlan_ops->add_vlan(vsi, &vlan); - if (!ret) - set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); + if (ret) + goto finish; + + /* If all-multicast is currently enabled and this VLAN ID is only one + * besides VLAN-0 we have to update look-up type of multicast promisc + * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN. + */ + if ((vsi->current_netdev_flags & IFF_ALLMULTI) && + ice_vsi_num_non_zero_vlans(vsi) == 1) { + ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_PROMISC_BITS, 0); + ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, 0); + } + +finish: + clear_bit(ICE_CFG_BUSY, vsi->state); return ret; } @@ -3522,6 +3544,9 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) if (!vid) return 0; + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) + usleep_range(1000, 2000); + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Make sure VLAN delete is successful before updating VLAN @@ -3530,10 +3555,33 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); ret = vlan_ops->del_vlan(vsi, &vlan); if (ret) - return ret; + goto finish; - set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); - return 0; + /* Remove multicast promisc rule for the removed VLAN ID if + * all-multicast is enabled. + */ + if (vsi->current_netdev_flags & IFF_ALLMULTI) + ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, vid); + + if (!ice_vsi_has_non_zero_vlans(vsi)) { + /* Update look-up type of multicast promisc rule for VLAN 0 + * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when + * all-multicast is enabled and VLAN 0 is the only VLAN rule. + */ + if (vsi->current_netdev_flags & IFF_ALLMULTI) { + ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, + 0); + ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_PROMISC_BITS, 0); + } + } + +finish: + clear_bit(ICE_CFG_BUSY, vsi->state); + + return ret; } /** From 60be976ac45137657b7b505d7e0d44d0e51accb7 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 1 Apr 2022 10:48:42 +0800 Subject: [PATCH 114/708] mctp: Fix check for dev_hard_header() result dev_hard_header() returns the length of the header, so we need to test for negative errors rather than non-zero. Fixes: 889b7da23abf ("mctp: Add initial routing framework") Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index d5e7db83fe9d..ee548c46c78f 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -512,7 +512,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), daddr, skb->dev->dev_addr, skb->len); - if (rc) { + if (rc < 0) { kfree_skb(skb); return -EHOSTUNREACH; } From 8ce40a2fd350769e94877b53d353a3b11d85f43b Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 1 Apr 2022 10:48:43 +0800 Subject: [PATCH 115/708] mctp i2c: correct mctp_i2c_header_create result header_ops.create should return the length of the header, instead mctp_i2c_head_create() returned 0. This didn't cause any problem because the MCTP stack accepted 0 as success. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- drivers/net/mctp/mctp-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index baf7afac7857..53846c6b56ca 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -553,7 +553,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, hdr->source_slave = ((llsrc << 1) & 0xff) | 0x01; mhdr->ver = 0x01; - return 0; + return sizeof(struct mctp_i2c_hdr); } static int mctp_i2c_tx_thread(void *data) From 4a9dda1c1da65beee994f0977a56a9a21c5db2a7 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 1 Apr 2022 10:48:44 +0800 Subject: [PATCH 116/708] mctp: Use output netdev to allocate skb headroom Previously the skb was allocated with headroom MCTP_HEADER_MAXLEN, but that isn't sufficient if we are using devs that are not MCTP specific. This also adds a check that the smctp_halen provided to sendmsg for extended addressing is the correct size for the netdev. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Reported-by: Matthew Rinaldi Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/net/mctp.h | 2 -- net/mctp/af_mctp.c | 46 +++++++++++++++++++++++++++++++++------------- net/mctp/route.c | 14 +++++++++++--- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index d37268fe6825..82800d521c3d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -36,8 +36,6 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) -#define MCTP_HEADER_MAXLEN 4 - #define MCTP_INITIAL_DEFAULT_NET 1 static inline bool mctp_address_unicast(mctp_eid_t eid) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index f0702d920d8d..e22b0cbb2f35 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -93,13 +93,13 @@ out_release: static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); - const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); int rc, addrlen = msg->msg_namelen; struct sock *sk = sock->sk; struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; struct mctp_route *rt; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + int hlen; if (addr) { const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER | @@ -129,6 +129,34 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (addr->smctp_network == MCTP_NET_ANY) addr->smctp_network = mctp_default_net(sock_net(sk)); + /* direct addressing */ + if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, + extaddr, msg->msg_name); + struct net_device *dev; + + rc = -EINVAL; + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex); + /* check for correct halen */ + if (dev && extaddr->smctp_halen == dev->addr_len) { + hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr); + rc = 0; + } + rcu_read_unlock(); + if (rc) + goto err_free; + rt = NULL; + } else { + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) { + rc = -EHOSTUNREACH; + goto err_free; + } + hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr); + } + skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) @@ -147,8 +175,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) cb = __mctp_cb(skb); cb->net = addr->smctp_network; - /* direct addressing */ - if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { + if (!rt) { + /* fill extended address in cb */ DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, extaddr, msg->msg_name); @@ -159,17 +187,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } cb->ifindex = extaddr->smctp_ifindex; + /* smctp_halen is checked above */ cb->halen = extaddr->smctp_halen; memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); - - rt = NULL; - } else { - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) { - rc = -EHOSTUNREACH; - goto err_free; - } } rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, diff --git a/net/mctp/route.c b/net/mctp/route.c index ee548c46c78f..3b24b8d18b5b 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -503,6 +503,11 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) if (cb->ifindex) { /* direct route; use the hwaddr we stashed in sendmsg */ + if (cb->halen != skb->dev->addr_len) { + /* sanity check, sendmsg should have already caught this */ + kfree_skb(skb); + return -EMSGSIZE; + } daddr = cb->haddr; } else { /* If lookup fails let the device handle daddr==NULL */ @@ -756,7 +761,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, { const unsigned int hlen = sizeof(struct mctp_hdr); struct mctp_hdr *hdr, *hdr2; - unsigned int pos, size; + unsigned int pos, size, headroom; struct sk_buff *skb2; int rc; u8 seq; @@ -770,6 +775,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, return -EMSGSIZE; } + /* keep same headroom as the original skb */ + headroom = skb_headroom(skb); + /* we've got the header */ skb_pull(skb, hlen); @@ -777,7 +785,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* size of message payload */ size = min(mtu - hlen, skb->len - pos); - skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); + skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL); if (!skb2) { rc = -ENOMEM; break; @@ -793,7 +801,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, skb_set_owner_w(skb2, skb->sk); /* establish packet */ - skb_reserve(skb2, MCTP_HEADER_MAXLEN); + skb_reserve(skb2, headroom); skb_reset_network_header(skb2); skb_put(skb2, hlen + size); skb2->transport_header = skb2->network_header + hlen; From c3efcedd272aa6dd5929e20cf902a52ddaa1197a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 31 Mar 2022 22:42:44 -0700 Subject: [PATCH 117/708] net: micrel: fix KS8851_MLL Kconfig KS8851_MLL selects MICREL_PHY, which depends on PTP_1588_CLOCK_OPTIONAL, so make KS8851_MLL also depend on PTP_1588_CLOCK_OPTIONAL since 'select' does not follow any dependency chains. Fixes kconfig warning and build errors: WARNING: unmet direct dependencies detected for MICREL_PHY Depends on [m]: NETDEVICES [=y] && PHYLIB [=y] && PTP_1588_CLOCK_OPTIONAL [=m] Selected by [y]: - KS8851_MLL [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICREL [=y] && HAS_IOMEM [=y] ld: drivers/net/phy/micrel.o: in function `lan8814_ts_info': micrel.c:(.text+0xb35): undefined reference to `ptp_clock_index' ld: drivers/net/phy/micrel.o: in function `lan8814_probe': micrel.c:(.text+0x2586): undefined reference to `ptp_clock_register' Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig index 93df3049cdc0..1b632cdd7630 100644 --- a/drivers/net/ethernet/micrel/Kconfig +++ b/drivers/net/ethernet/micrel/Kconfig @@ -39,6 +39,7 @@ config KS8851 config KS8851_MLL tristate "Micrel KS8851 MLL" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL select MII select CRC32 select EEPROM_93CX6 From 6bf92d70e690b7ff12b24f4bfff5e5434d019b82 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 10:33:42 +0300 Subject: [PATCH 118/708] net: ipv4: fix route with nexthop object delete warning FRR folks have hit a kernel warning[1] while deleting routes[2] which is caused by trying to delete a route pointing to a nexthop id without specifying nhid but matching on an interface. That is, a route is found but we hit a warning while matching it. The warning is from fib_info_nh() in include/net/nexthop.h because we run it on a fib_info with nexthop object. The call chain is: inet_rtm_delroute -> fib_table_delete -> fib_nh_match (called with a nexthop fib_info and also with fc_oif set thus calling fib_info_nh on the fib_info and triggering the warning). The fix is to not do any matching in that branch if the fi has a nexthop object because those are managed separately. I.e. we should match when deleting without nh spec and should fail when deleting a nexthop route with old-style nh spec because nexthop objects are managed separately, e.g.: $ ip r show 1.2.3.4/32 1.2.3.4 nhid 12 via 192.168.11.2 dev dummy0 $ ip r del 1.2.3.4/32 $ ip r del 1.2.3.4/32 nhid 12 $ ip r del 1.2.3.4/32 dev dummy0 [1] [ 523.462226] ------------[ cut here ]------------ [ 523.462230] WARNING: CPU: 14 PID: 22893 at include/net/nexthop.h:468 fib_nh_match+0x210/0x460 [ 523.462236] Modules linked in: dummy rpcsec_gss_krb5 xt_socket nf_socket_ipv4 nf_socket_ipv6 ip6table_raw iptable_raw bpf_preload xt_statistic ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs xt_mark nf_tables xt_nat veth nf_conntrack_netlink nfnetlink xt_addrtype br_netfilter overlay dm_crypt nfsv3 nfs fscache netfs vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack 8021q garp mrp ipt_REJECT nf_reject_ipv4 ip6table_mangle ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bridge stp llc rfcomm snd_seq_dummy snd_hrtimer rpcrdma rdma_cm iw_cm ib_cm ib_core ip6table_filter xt_comment ip6_tables vboxnetadp(OE) vboxnetflt(OE) vboxdrv(OE) qrtr bnep binfmt_misc xfs vfat fat squashfs loop nvidia_drm(POE) nvidia_modeset(POE) nvidia_uvm(POE) nvidia(POE) intel_rapl_msr intel_rapl_common snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi btusb btrtl iwlmvm uvcvideo btbcm snd_hda_intel edac_mce_amd [ 523.462274] videobuf2_vmalloc videobuf2_memops btintel snd_intel_dspcfg videobuf2_v4l2 snd_intel_sdw_acpi bluetooth snd_usb_audio snd_hda_codec mac80211 snd_usbmidi_lib joydev snd_hda_core videobuf2_common kvm_amd snd_rawmidi snd_hwdep snd_seq videodev ccp snd_seq_device libarc4 ecdh_generic mc snd_pcm kvm iwlwifi snd_timer drm_kms_helper snd cfg80211 cec soundcore irqbypass rapl wmi_bmof i2c_piix4 rfkill k10temp pcspkr acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc drm zram ip_tables crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel nvme sp5100_tco r8169 nvme_core wmi ipmi_devintf ipmi_msghandler fuse [ 523.462300] CPU: 14 PID: 22893 Comm: ip Tainted: P OE 5.16.18-200.fc35.x86_64 #1 [ 523.462302] Hardware name: Micro-Star International Co., Ltd. MS-7C37/MPG X570 GAMING EDGE WIFI (MS-7C37), BIOS 1.C0 10/29/2020 [ 523.462303] RIP: 0010:fib_nh_match+0x210/0x460 [ 523.462304] Code: 7c 24 20 48 8b b5 90 00 00 00 e8 bb ee f4 ff 48 8b 7c 24 20 41 89 c4 e8 ee eb f4 ff 45 85 e4 0f 85 2e fe ff ff e9 4c ff ff ff <0f> 0b e9 17 ff ff ff 3c 0a 0f 85 61 fe ff ff 48 8b b5 98 00 00 00 [ 523.462306] RSP: 0018:ffffaa53d4d87928 EFLAGS: 00010286 [ 523.462307] RAX: 0000000000000000 RBX: ffffaa53d4d87a90 RCX: ffffaa53d4d87bb0 [ 523.462308] RDX: ffff9e3d2ee6be80 RSI: ffffaa53d4d87a90 RDI: ffffffff920ed380 [ 523.462309] RBP: ffff9e3d2ee6be80 R08: 0000000000000064 R09: 0000000000000000 [ 523.462310] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000031 [ 523.462310] R13: 0000000000000020 R14: 0000000000000000 R15: ffff9e3d331054e0 [ 523.462311] FS: 00007f245517c1c0(0000) GS:ffff9e492ed80000(0000) knlGS:0000000000000000 [ 523.462313] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 523.462313] CR2: 000055e5dfdd8268 CR3: 00000003ef488000 CR4: 0000000000350ee0 [ 523.462315] Call Trace: [ 523.462316] [ 523.462320] fib_table_delete+0x1a9/0x310 [ 523.462323] inet_rtm_delroute+0x93/0x110 [ 523.462325] rtnetlink_rcv_msg+0x133/0x370 [ 523.462327] ? _copy_to_iter+0xb5/0x6f0 [ 523.462330] ? rtnl_calcit.isra.0+0x110/0x110 [ 523.462331] netlink_rcv_skb+0x50/0xf0 [ 523.462334] netlink_unicast+0x211/0x330 [ 523.462336] netlink_sendmsg+0x23f/0x480 [ 523.462338] sock_sendmsg+0x5e/0x60 [ 523.462340] ____sys_sendmsg+0x22c/0x270 [ 523.462341] ? import_iovec+0x17/0x20 [ 523.462343] ? sendmsg_copy_msghdr+0x59/0x90 [ 523.462344] ? __mod_lruvec_page_state+0x85/0x110 [ 523.462348] ___sys_sendmsg+0x81/0xc0 [ 523.462350] ? netlink_seq_start+0x70/0x70 [ 523.462352] ? __dentry_kill+0x13a/0x180 [ 523.462354] ? __fput+0xff/0x250 [ 523.462356] __sys_sendmsg+0x49/0x80 [ 523.462358] do_syscall_64+0x3b/0x90 [ 523.462361] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 523.462364] RIP: 0033:0x7f24552aa337 [ 523.462365] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 523.462366] RSP: 002b:00007fff7f05a838 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 523.462368] RAX: ffffffffffffffda RBX: 000000006245bf91 RCX: 00007f24552aa337 [ 523.462368] RDX: 0000000000000000 RSI: 00007fff7f05a8a0 RDI: 0000000000000003 [ 523.462369] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 523.462370] R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000001 [ 523.462370] R13: 00007fff7f05ce08 R14: 0000000000000000 R15: 000055e5dfdd1040 [ 523.462373] [ 523.462374] ---[ end trace ba537bc16f6bf4ed ]--- [2] https://github.com/FRRouting/frr/issues/6412 Fixes: 4c7e8084fd46 ("ipv4: Plumb support for nexthop object in a fib_info") Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index cc8e84ef2ae4..ccb62038f6a4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -889,8 +889,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) From 392baa339c6a42a2cb088e5e5df2b59b8f89be24 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 10:33:43 +0300 Subject: [PATCH 119/708] selftests: net: add delete nexthop route warning test Add a test which causes a WARNING on kernels which treat a nexthop route like a normal route when comparing for deletion and a device is specified. That is, a route is found but we hit a warning while matching it. The warning is from fib_info_nh() in include/net/nexthop.h because we run it on a fib_info with nexthop object. The call chain is: inet_rtm_delroute -> fib_table_delete -> fib_nh_match (called with a nexthop fib_info and also with fc_oif set thus calling fib_info_nh on the fib_info and triggering the warning). Repro steps: $ ip nexthop add id 12 via 172.16.1.3 dev veth1 $ ip route add 172.16.101.1/32 nhid 12 $ ip route delete 172.16.101.1/32 dev veth1 Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d444ee6aa3cb..d8ede0c81ac1 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1208,6 +1208,20 @@ ipv4_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # nexthop route delete warning: route add with nhid and delete + # using device + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1" + out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + run_cmd "$IP route add 172.16.101.1/32 nhid 12" + run_cmd "$IP route delete 172.16.101.1/32 dev veth1" + out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + [ $out1 -eq $out2 ] + rc=$? + log_test $rc 0 "Delete nexthop route warning" + run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP ip nexthop del id 12" } ipv4_grp_fcnal() From 31ac3bcee47b8628d676bf4080c56b238d0222d1 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Fri, 1 Apr 2022 16:24:11 -0700 Subject: [PATCH 120/708] net/fungible: Fix reference to __udivdi3 on 32b builds 32b builds with CONFIG_PHYS_ADDR_T_64BIT=y, such as i386 PAE, raise a linker error due to a 64b division: ld: drivers/net/ethernet/fungible/funcore/fun_dev.o: in function `fun_dev_enable': (.text+0xe1a): undefined reference to `__udivdi3' The divisor in the offendinng expression is a power of 2. Change it to use an explicit right shift. Fixes: e1ffcc66818f ("net/fungible: Add service module for Fungible drivers") Reported-by: Randy Dunlap Signed-off-by: Dimitris Michailidis Link: https://lore.kernel.org/r/20220401232411.313881-1-dmichail@fungible.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/fungible/funcore/fun_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c index 5d7aef73df61..fb5120d90f26 100644 --- a/drivers/net/ethernet/fungible/funcore/fun_dev.c +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c @@ -586,8 +586,8 @@ static int fun_get_dev_limits(struct fun_dev *fdev) /* Calculate the max QID based on SQ/CQ/doorbell counts. * SQ/CQ doorbells alternate. */ - num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) / - (fdev->db_stride * 4); + num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >> + (2 + NVME_CAP_STRIDE(fdev->cap_reg)); fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1; fdev->kern_end_qid = fdev->max_qid + 1; return 0; From c21cabb0fd0b54b8b54235fc1ecfe1195a23bcb2 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 1 Apr 2022 02:48:32 +0800 Subject: [PATCH 121/708] net: stmmac: Fix unset max_speed difference between DT and non-DT platforms In commit 9cbadf094d9d ("net: stmmac: support max-speed device tree property"), when DT platforms don't set "max-speed", max_speed is set to -1; for non-DT platforms, it stays the default 0. Prior to commit eeef2f6b9f6e ("net: stmmac: Start adding phylink support"), the check for a valid max_speed setting was to check if it was greater than zero. This commit got it right, but subsequent patches just checked for non-zero, which is incorrect for DT platforms. In commit 92c3807b9ac3 ("net: stmmac: convert to phylink_get_linkmodes()") the conversion switched completely to checking for non-zero value as a valid value, which caused 1000base-T to stop getting advertised by default. Instead of trying to fix all the checks, simply leave max_speed alone if DT property parsing fails. Fixes: 9cbadf094d9d ("net: stmmac: support max-speed device tree property") Fixes: 92c3807b9ac3 ("net: stmmac: convert to phylink_get_linkmodes()") Signed-off-by: Chen-Yu Tsai Acked-by: Russell King (Oracle) Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220331184832.16316-1-wens@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5d29f336315b..11e1055e8260 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -431,8 +431,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->phylink_node = np; /* Get max speed of operation from device tree */ - if (of_property_read_u32(np, "max-speed", &plat->max_speed)) - plat->max_speed = -1; + of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); if (plat->bus_id < 0) From 5a48b7433a5aee719ab242d2feadaf4c9e065989 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 2 Apr 2022 09:46:23 -0500 Subject: [PATCH 122/708] docs: net: dsa: fix minor grammar and punctuation issues Fix a few typos and minor grammatical issues. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.rst | 64 ++++++++++++++-------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index 89bb4fa4c362..ddc1dd039337 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -10,21 +10,21 @@ in joining the effort. Design principles ================= -The Distributed Switch Architecture is a subsystem which was primarily designed -to support Marvell Ethernet switches (MV88E6xxx, a.k.a Linkstreet product line) -using Linux, but has since evolved to support other vendors as well. +The Distributed Switch Architecture subsystem was primarily designed to +support Marvell Ethernet switches (MV88E6xxx, a.k.a. Link Street product +line) using Linux, but has since evolved to support other vendors as well. The original philosophy behind this design was to be able to use unmodified Linux tools such as bridge, iproute2, ifconfig to work transparently whether they configured/queried a switch port network device or a regular network device. -An Ethernet switch is typically comprised of multiple front-panel ports, and one -or more CPU or management port. The DSA subsystem currently relies on the +An Ethernet switch typically comprises multiple front-panel ports and one +or more CPU or management ports. The DSA subsystem currently relies on the presence of a management port connected to an Ethernet controller capable of receiving Ethernet frames from the switch. This is a very common setup for all kinds of Ethernet switches found in Small Home and Office products: routers, -gateways, or even top-of-the rack switches. This host Ethernet controller will +gateways, or even top-of-rack switches. This host Ethernet controller will be later referred to as "master" and "cpu" in DSA terminology and code. The D in DSA stands for Distributed, because the subsystem has been designed @@ -33,14 +33,14 @@ using upstream and downstream Ethernet links between switches. These specific ports are referred to as "dsa" ports in DSA terminology and code. A collection of multiple switches connected to each other is called a "switch tree". -For each front-panel port, DSA will create specialized network devices which are +For each front-panel port, DSA creates specialized network devices which are used as controlling and data-flowing endpoints for use by the Linux networking stack. These specialized network interfaces are referred to as "slave" network interfaces in DSA terminology and code. The ideal case for using DSA is when an Ethernet switch supports a "switch tag" which is a hardware feature making the switch insert a specific tag for each -Ethernet frames it received to/from specific ports to help the management +Ethernet frame it receives to/from specific ports to help the management interface figure out: - what port is this frame coming from @@ -125,7 +125,7 @@ other switches from the same fabric, and in this case, the outermost switch ports must decapsulate the packet. Note that in certain cases, it might be the case that the tagging format used -by a leaf switch (not connected directly to the CPU) to not be the same as what +by a leaf switch (not connected directly to the CPU) is not the same as what the network stack sees. This can be seen with Marvell switch trees, where the CPU port can be configured to use either the DSA or the Ethertype DSA (EDSA) format, but the DSA links are configured to use the shorter (without Ethertype) @@ -270,21 +270,21 @@ These interfaces are specialized in order to: to/from specific switch ports - query the switch for ethtool operations: statistics, link state, Wake-on-LAN, register dumps... -- external/internal PHY management: link, auto-negotiation etc. +- manage external/internal PHY: link, auto-negotiation, etc. These slave network devices have custom net_device_ops and ethtool_ops function pointers which allow DSA to introduce a level of layering between the networking -stack/ethtool, and the switch driver implementation. +stack/ethtool and the switch driver implementation. Upon frame transmission from these slave network devices, DSA will look up which -switch tagging protocol is currently registered with these network devices, and +switch tagging protocol is currently registered with these network devices and invoke a specific transmit routine which takes care of adding the relevant switch tag in the Ethernet frames. These frames are then queued for transmission using the master network device -``ndo_start_xmit()`` function, since they contain the appropriate switch tag, the +``ndo_start_xmit()`` function. Since they contain the appropriate switch tag, the Ethernet switch will be able to process these incoming frames from the -management interface and delivers these frames to the physical switch port. +management interface and deliver them to the physical switch port. Graphical representation ------------------------ @@ -330,9 +330,9 @@ MDIO reads/writes towards specific PHY addresses. In most MDIO-connected switches, these functions would utilize direct or indirect PHY addressing mode to return standard MII registers from the switch builtin PHYs, allowing the PHY library and/or to return link status, link partner pages, auto-negotiation -results etc.. +results, etc. -For Ethernet switches which have both external and internal MDIO busses, the +For Ethernet switches which have both external and internal MDIO buses, the slave MII bus can be utilized to mux/demux MDIO reads and writes towards either internal or external MDIO devices this switch might be connected to: internal PHYs, external PHYs, or even external switches. @@ -349,7 +349,7 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as table indication (when cascading switches) - ``dsa_platform_data``: platform device configuration data which can reference - a collection of dsa_chip_data structure if multiples switches are cascaded, + a collection of dsa_chip_data structures if multiple switches are cascaded, the master network device this switch tree is attached to needs to be referenced @@ -426,7 +426,7 @@ logic basically looks like this: "phy-handle" property, if found, this PHY device is created and registered using ``of_phy_connect()`` -- if Device Tree is used, and the PHY device is "fixed", that is, conforms to +- if Device Tree is used and the PHY device is "fixed", that is, conforms to the definition of a non-MDIO managed PHY as defined in ``Documentation/devicetree/bindings/net/fixed-link.txt``, the PHY is registered and connected transparently using the special fixed MDIO bus driver @@ -481,7 +481,7 @@ Device Tree DSA features a standardized binding which is documented in ``Documentation/devicetree/bindings/net/dsa/dsa.txt``. PHY/MDIO library helper functions such as ``of_get_phy_mode()``, ``of_phy_connect()`` are also used to query -per-port PHY specific details: interface connection, MDIO bus location etc.. +per-port PHY specific details: interface connection, MDIO bus location, etc. Driver development ================== @@ -509,7 +509,7 @@ Switch configuration - ``setup``: setup function for the switch, this function is responsible for setting up the ``dsa_switch_ops`` private structure with all it needs: register maps, - interrupts, mutexes, locks etc.. This function is also expected to properly + interrupts, mutexes, locks, etc. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating a Port-based VLAN ID for each port and allowing only the CPU port and the @@ -526,13 +526,13 @@ PHY devices and link management - ``get_phy_flags``: Some switches are interfaced to various kinds of Ethernet PHYs, if the PHY library PHY driver needs to know about information it cannot obtain on its own (e.g.: coming from switch memory mapped registers), this function - should return a 32-bits bitmask of "flags", that is private between the switch + should return a 32-bit bitmask of "flags" that is private between the switch driver and the Ethernet PHY driver in ``drivers/net/phy/\*``. - ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read the switch port MDIO registers. If unavailable, return 0xffff for each read. For builtin switch Ethernet PHYs, this function should allow reading the link - status, auto-negotiation results, link partner pages etc.. + status, auto-negotiation results, link partner pages, etc. - ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write to the switch port MDIO registers. If unavailable return a negative error @@ -554,7 +554,7 @@ Ethtool operations ------------------ - ``get_strings``: ethtool function used to query the driver's strings, will - typically return statistics strings, private flags strings etc. + typically return statistics strings, private flags strings, etc. - ``get_ethtool_stats``: ethtool function used to query per-port statistics and return their values. DSA overlays slave network devices general statistics: @@ -564,7 +564,7 @@ Ethtool operations - ``get_sset_count``: ethtool function used to query the number of statistics items - ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this - function may, for certain implementations also query the master network device + function may for certain implementations also query the master network device Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN - ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port, @@ -607,14 +607,14 @@ Power management in a fully active state - ``port_enable``: function invoked by the DSA slave network device ndo_open - function when a port is administratively brought up, this function should be - fully enabling a given switch port. DSA takes care of marking the port with + function when a port is administratively brought up, this function should + fully enable a given switch port. DSA takes care of marking the port with ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it was not, and propagating these changes down to the hardware - ``port_disable``: function invoked by the DSA slave network device ndo_close - function when a port is administratively brought down, this function should be - fully disabling a given switch port. DSA takes care of marking the port with + function when a port is administratively brought down, this function should + fully disable a given switch port. DSA takes care of marking the port with ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is disabled while being a bridge member @@ -622,12 +622,12 @@ Bridge layer ------------ - ``port_bridge_join``: bridge layer function invoked when a given switch port is - added to a bridge, this function should be doing the necessary at the switch - level to permit the joining port from being added to the relevant logical + added to a bridge, this function should do what's necessary at the switch + level to permit the joining port to be added to the relevant logical domain for it to ingress/egress traffic with other members of the bridge. - ``port_bridge_leave``: bridge layer function invoked when a given switch port is - removed from a bridge, this function should be doing the necessary at the + removed from a bridge, this function should do what's necessary at the switch level to deny the leaving port from ingress/egress traffic from the remaining bridge members. When the port leaves the bridge, it should be aged out at the switch hardware for the switch to (re) learn MAC addresses behind @@ -663,7 +663,7 @@ Bridge layer point for drivers that need to configure the hardware for enabling this feature. -- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoken when a driver +- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver leaves a bridge port which had the TX forwarding offload feature enabled. Bridge VLAN filtering From 692930cc435099580a4b9e32fa781b0688c18439 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 18:54:27 +0300 Subject: [PATCH 123/708] selftests: net: fix nexthop warning cleanup double ip typo I made a stupid typo when adding the nexthop route warning selftest and added both $IP and ip after it (double ip) on the cleanup path. The error doesn't show up when running the test, but obviously it doesn't cleanup properly after it. Fixes: 392baa339c6a ("selftests: net: add delete nexthop route warning test") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d8ede0c81ac1..b3bf5319bb0e 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1220,8 +1220,8 @@ ipv4_fcnal() [ $out1 -eq $out2 ] rc=$? log_test $rc 0 "Delete nexthop route warning" - run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12" - run_cmd "$IP ip nexthop del id 12" + run_cmd "$IP route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP nexthop del id 12" } ipv4_grp_fcnal() From 7f921a2d6c93051b6002dbb7c1781f1fa5b88cce Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 3 Apr 2022 22:12:52 +1000 Subject: [PATCH 124/708] KVM: PPC: Move kvmhv_on_pseries() into kvm_ppc.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We recently introduced a usage of kvmhv_on_pseries() in powerpc.c, which causes a build error for ppc64_book3e_allmodconfig: arch/powerpc/kvm/powerpc.c:716:8: error: implicit declaration of function ‘kvmhv_on_pseries’ 716 | if (kvmhv_on_pseries()) { | ^~~~~~~~~~~~~~~~ Fix it by moving kvmhv_on_pseries() into kvm_ppc.h so that the stub version is available for book3e builds. Fixes: f771b55731fc ("KVM: PPC: Use KVM_CAP_PPC_AIL_MODE_3") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kvm_book3s_64.h | 12 ------------ arch/powerpc/include/asm/kvm_ppc.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 827038a33064..4def2bd17b9b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -16,18 +16,6 @@ #include #include -#ifdef CONFIG_PPC_PSERIES -static inline bool kvmhv_on_pseries(void) -{ - return !cpu_has_feature(CPU_FTR_HVMODE); -} -#else -static inline bool kvmhv_on_pseries(void) -{ - return false; -} -#endif - /* * Structure for a nested guest, that is, for a guest that is managed by * one of our guests. diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c583d0c37f31..838d4cb460b7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -586,6 +586,18 @@ static inline bool kvm_hv_mode_active(void) { return false; } #endif +#ifdef CONFIG_PPC_PSERIES +static inline bool kvmhv_on_pseries(void) +{ + return !cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool kvmhv_on_pseries(void) +{ + return false; +} +#endif + #ifdef CONFIG_KVM_XICS static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { From ec858afda857e361182ceafc3d2ba2b164b8e889 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2022 11:06:02 -0600 Subject: [PATCH 125/708] io_uring: don't check req->file in io_fsync_prep() This is a leftover from the really old days where we weren't able to track and error early if we need a file and it wasn't assigned. Kill the check. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a8413f006417..9108c56bff5b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4513,9 +4513,6 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - if (!req->file) - return -EBADF; - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || From 76ed2f61ae3ee5ca6e3ef155a703ab3eee1eb295 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 22 Mar 2022 16:01:37 +0900 Subject: [PATCH 126/708] ata: libata-sff: Fix compilation warning in ata_sff_lost_interrupt() When returning false, ata_sff_altstatus() does not return any status value, resulting in a compilation warning in ata_sff_lost_interrupt() ("uninitialized symbol 'status'"). Fix this by initializing the local variable "status" to 0. Fixes: 03c0e84f9c1e ("ata: libata-sff: refactor ata_sff_altstatus()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- drivers/ata/libata-sff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index b3be7a8f5bea..b1666adc1c3a 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1634,7 +1634,7 @@ EXPORT_SYMBOL_GPL(ata_sff_interrupt); void ata_sff_lost_interrupt(struct ata_port *ap) { - u8 status; + u8 status = 0; struct ata_queued_cmd *qc; /* Only one outstanding command per SFF channel */ From 7aa8104a554713b685db729e66511b93d989dd6a Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 19 Mar 2022 21:11:02 +0100 Subject: [PATCH 127/708] ata: sata_dwc_460ex: Fix crash due to OOB write the driver uses libata's "tag" values from in various arrays. Since the mentioned patch bumped the ATA_TAG_INTERNAL to 32, the value of the SATA_DWC_QCMD_MAX needs to account for that. Otherwise ATA_TAG_INTERNAL usage cause similar crashes like this as reported by Tice Rex on the OpenWrt Forum and reproduced (with symbols) here: | BUG: Kernel NULL pointer dereference at 0x00000000 | Faulting instruction address: 0xc03ed4b8 | Oops: Kernel access of bad area, sig: 11 [#1] | BE PAGE_SIZE=4K PowerPC 44x Platform | CPU: 0 PID: 362 Comm: scsi_eh_1 Not tainted 5.4.163 #0 | NIP: c03ed4b8 LR: c03d27e8 CTR: c03ed36c | REGS: cfa59950 TRAP: 0300 Not tainted (5.4.163) | MSR: 00021000 CR: 42000222 XER: 00000000 | DEAR: 00000000 ESR: 00000000 | GPR00: c03d27e8 cfa59a08 cfa55fe0 00000000 0fa46bc0 [...] | [..] | NIP [c03ed4b8] sata_dwc_qc_issue+0x14c/0x254 | LR [c03d27e8] ata_qc_issue+0x1c8/0x2dc | Call Trace: | [cfa59a08] [c003f4e0] __cancel_work_timer+0x124/0x194 (unreliable) | [cfa59a78] [c03d27e8] ata_qc_issue+0x1c8/0x2dc | [cfa59a98] [c03d2b3c] ata_exec_internal_sg+0x240/0x524 | [cfa59b08] [c03d2e98] ata_exec_internal+0x78/0xe0 | [cfa59b58] [c03d30fc] ata_read_log_page.part.38+0x1dc/0x204 | [cfa59bc8] [c03d324c] ata_identify_page_supported+0x68/0x130 | [...] This is because sata_dwc_dma_xfer_complete() NULLs the dma_pending's next neighbour "chan" (a *dma_chan struct) in this '32' case right here (line ~735): > hsdevp->dma_pending[tag] = SATA_DWC_DMA_PENDING_NONE; Then the next time, a dma gets issued; dma_dwc_xfer_setup() passes the NULL'd hsdevp->chan to the dmaengine_slave_config() which then causes the crash. With this patch, SATA_DWC_QCMD_MAX is now set to ATA_MAX_QUEUE + 1. This avoids the OOB. But please note, there was a worthwhile discussion on what ATA_TAG_INTERNAL and ATA_MAX_QUEUE is. And why there should not be a "fake" 33 command-long queue size. Ideally, the dw driver should account for the ATA_TAG_INTERNAL. In Damien Le Moal's words: "... having looked at the driver, it is a bigger change than just faking a 33rd "tag" that is in fact not a command tag at all." Fixes: 28361c403683c ("libata: add extra internal command") Cc: stable@kernel.org # 4.18+ BugLink: https://github.com/openwrt/openwrt/issues/9505 Signed-off-by: Christian Lamparter Signed-off-by: Damien Le Moal --- drivers/ata/sata_dwc_460ex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index bec33d781ae0..e3263e961045 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -137,7 +137,11 @@ struct sata_dwc_device { #endif }; -#define SATA_DWC_QCMD_MAX 32 +/* + * Allow one extra special slot for commands and DMA management + * to account for libata internal commands. + */ +#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1) struct sata_dwc_device_port { struct sata_dwc_device *hsdev; From 5399752299396a3c9df6617f4b3c907d7aa4ded8 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 19 Mar 2022 21:11:03 +0100 Subject: [PATCH 128/708] ata: libata-core: Disable READ LOG DMA EXT for Samsung 840 EVOs Samsung' 840 EVO with the latest firmware (EXT0DB6Q) locks up with the a message: "READ LOG DMA EXT failed, trying PIO" during boot. Initially this was discovered because it caused a crash with the sata_dwc_460ex controller on a WD MyBook Live DUO. The reporter "Tice Rex" which has the unique opportunity that he has two Samsung 840 EVO SSD! One with the older firmware "EXT0BB0Q" which booted fine and didn't expose "READ LOG DMA EXT". But the newer/latest firmware "EXT0DB6Q" caused the headaches. BugLink: https://github.com/openwrt/openwrt/issues/9505 Signed-off-by: Christian Lamparter Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cceedde51126..ca64837641be 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4014,6 +4014,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_NO_DMA_LOG | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | From 98f0d68f94ea21541e0050cc64fa108ade779839 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 24 Feb 2022 15:24:04 +0000 Subject: [PATCH 129/708] firmware: arm_scmi: Remove clear channel call on the TX channel On SCMI transports whose channels are based on a shared resource the TX channel area has to be acquired by the agent before placing the desired command into the channel and it will be then relinquished by the platform once the related reply has been made available into the channel. On an RX channel the logic is reversed with the platform acquiring the channel area and the agent reliquishing it once done by calling the scmi_clear_channel() helper. As a consequence, even in case of error, the agent must never try to clear a TX channel from its side: restrict the existing clear channel call on the the reply path only to delayed responses since they are indeed coming from the RX channel. Link: https://lore.kernel.org/r/20220224152404.12877-1-cristian.marussi@arm.com Fixes: e9b21c96181c ("firmware: arm_scmi: Make .clear_channel optional") Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 46118300a4d1..e17c6568344d 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -679,7 +679,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, xfer = scmi_xfer_command_acquire(cinfo, msg_hdr); if (IS_ERR(xfer)) { - scmi_clear_channel(info, cinfo); + if (MSG_XTRACT_TYPE(msg_hdr) == MSG_TYPE_DELAYED_RESP) + scmi_clear_channel(info, cinfo); return; } From b3f1dd52c991d79118f35e6d1bf4d7cb09882e38 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 31 Mar 2022 12:04:43 -0700 Subject: [PATCH 130/708] ARM: vexpress/spc: Avoid negative array index when !SMP When building multi_v7_defconfig+CONFIG_SMP=n, -Warray-bounds exposes a couple negative array index accesses: arch/arm/mach-vexpress/spc.c: In function 've_spc_clk_init': arch/arm/mach-vexpress/spc.c:583:21: warning: array subscript -1 is below array bounds of 'bool[2]' {aka '_Bool[2]'} [-Warray-bounds] 583 | if (init_opp_table[cluster]) | ~~~~~~~~~~~~~~^~~~~~~~~ arch/arm/mach-vexpress/spc.c:556:7: note: while referencing 'init_opp_table' 556 | bool init_opp_table[MAX_CLUSTERS] = { false }; | ^~~~~~~~~~~~~~ arch/arm/mach-vexpress/spc.c:592:18: warning: array subscript -1 is below array bounds of 'bool[2]' {aka '_Bool[2]'} [-Warray-bounds] 592 | init_opp_table[cluster] = true; | ~~~~~~~~~~~~~~^~~~~~~~~ arch/arm/mach-vexpress/spc.c:556:7: note: while referencing 'init_opp_table' 556 | bool init_opp_table[MAX_CLUSTERS] = { false }; | ^~~~~~~~~~~~~~ Skip this logic when built !SMP. Link: https://lore.kernel.org/r/20220331190443.851661-1-keescook@chromium.org Cc: Liviu Dudau Cc: Sudeep Holla Cc: Lorenzo Pieralisi Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Acked-by: Liviu Dudau Signed-off-by: Kees Cook Signed-off-by: Sudeep Holla --- arch/arm/mach-vexpress/spc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 1da11bdb1dfb..1c6500c4e6a1 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void) } cluster = topology_physical_package_id(cpu_dev->id); - if (init_opp_table[cluster]) + if (cluster < 0 || init_opp_table[cluster]) continue; if (ve_init_opp_table(cpu_dev)) From 8027a9ad9b3568c5eb49c968ad6c97f279d76730 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 5 Jan 2022 15:47:29 +0800 Subject: [PATCH 131/708] drm/imx: imx-ldb: Check for null pointer after calling kmemdup As the possible failure of the allocation, kmemdup() may return NULL pointer. Therefore, it should be better to check the return value of kmemdup() and return error if fails. Fixes: dc80d7038883 ("drm/imx-ldb: Add support to drm-bridge") Signed-off-by: Jiasheng Jiang Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220105074729.2363657-1-jiasheng@iscas.ac.cn --- drivers/gpu/drm/imx/imx-ldb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index e5078d03020d..fb0e951248f6 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -572,6 +572,8 @@ static int imx_ldb_panel_ddc(struct device *dev, edidp = of_get_property(child, "edid", &edid_len); if (edidp) { channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL); + if (!channel->edid) + return -ENOMEM; } else if (!channel->panel) { /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, From bce81feb03a20fca7bbdd1c4af16b4e9d5c0e1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sat, 8 Jan 2022 17:52:30 +0100 Subject: [PATCH 132/708] drm/imx: Fix memory leak in imx_pd_connector_get_modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid leaking the display mode variable if of_get_drm_display_mode fails. Fixes: 76ecd9c9fb24 ("drm/imx: parallel-display: check return code from of_get_drm_display_mode()") Addresses-Coverity-ID: 1443943 ("Resource leak") Signed-off-by: José Expósito Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220108165230.44610-1-jose.exposito89@gmail.com --- drivers/gpu/drm/imx/parallel-display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06cb1a59b9bc..63ba2ad84679 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -75,8 +75,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + drm_mode_destroy(connector->dev, mode); return ret; + } drm_mode_copy(mode, &imxpd->mode); mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; From e8083acc3f8cc2097917018e947fd4c857f60454 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 28 Jan 2022 17:19:44 +0800 Subject: [PATCH 133/708] drm/imx: dw_hdmi-imx: Fix bailout in error cases of probe In dw_hdmi_imx_probe(), if error happens after dw_hdmi_probe() returns successfully, dw_hdmi_remove() should be called where necessary as bailout. Fixes: c805ec7eb210 ("drm/imx: dw_hdmi-imx: move initialization into probe") Cc: Philipp Zabel Cc: David Airlie Cc: Daniel Vetter Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220128091944.3831256-1-victor.liu@nxp.com --- drivers/gpu/drm/imx/dw_hdmi-imx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c index 87428fb23d9f..a2277a0d6d06 100644 --- a/drivers/gpu/drm/imx/dw_hdmi-imx.c +++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c @@ -222,6 +222,7 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; const struct of_device_id *match = of_match_node(dw_hdmi_imx_dt_ids, np); struct imx_hdmi *hdmi; + int ret; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); if (!hdmi) @@ -243,10 +244,15 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) hdmi->bridge = of_drm_find_bridge(np); if (!hdmi->bridge) { dev_err(hdmi->dev, "Unable to find bridge\n"); + dw_hdmi_remove(hdmi->hdmi); return -ENODEV; } - return component_add(&pdev->dev, &dw_hdmi_imx_ops); + ret = component_add(&pdev->dev, &dw_hdmi_imx_ops); + if (ret) + dw_hdmi_remove(hdmi->hdmi); + + return ret; } static int dw_hdmi_imx_remove(struct platform_device *pdev) From 070a88fd4a03f921b73a2059e97d55faaa447dab Mon Sep 17 00:00:00 2001 From: Leo Ruan Date: Mon, 7 Feb 2022 16:14:11 +0100 Subject: [PATCH 134/708] gpu: ipu-v3: Fix dev_dbg frequency output This commit corrects the printing of the IPU clock error percentage if it is between -0.1% to -0.9%. For example, if the pixel clock requested is 27.2 MHz but only 27.0 MHz can be achieved the deviation is -0.8%. But the fixed point math had a flaw and calculated error of 0.2%. Before: Clocks: IPU 270000000Hz DI 24716667Hz Needed 27200000Hz IPU clock can give 27000000 with divider 10, error 0.2% Want 27200000Hz IPU 270000000Hz DI 24716667Hz using IPU, 27000000Hz After: Clocks: IPU 270000000Hz DI 24716667Hz Needed 27200000Hz IPU clock can give 27000000 with divider 10, error -0.8% Want 27200000Hz IPU 270000000Hz DI 24716667Hz using IPU, 27000000Hz Signed-off-by: Leo Ruan Signed-off-by: Mark Jonas Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220207151411.5009-1-mark.jonas@de.bosch.com --- drivers/gpu/ipu-v3/ipu-di.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index 666223c6bec4..0a34e0ab4fe6 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -447,8 +447,9 @@ static void ipu_di_config_clock(struct ipu_di *di, error = rate / (sig->mode.pixelclock / 1000); - dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n", - rate, div, (signed)(error - 1000) / 10, error % 10); + dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n", + rate, div, error < 1000 ? '-' : '+', + abs(error - 1000) / 10, abs(error - 1000) % 10); /* Allow a 1% error */ if (error < 1010 && error >= 990) { From 92d96b603738ec4f35cde7198c303ae264dd47cb Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Sun, 27 Mar 2022 18:01:54 -0700 Subject: [PATCH 135/708] regulator: wm8994: Add an off-on delay for WM8994 variant As per Table 130 of the wm8994 datasheet at [1], there is an off-on delay for LDO1 and LDO2. In the wm8958 datasheet [2], I could not find any reference to it. I could not find a wm1811 datasheet to double-check there, but as no one has complained presumably it works without it. This solves the issue on Samsung Aries boards with a wm8994 where register writes fail when the device is powered off and back-on quickly. [1] https://statics.cirrus.com/pubs/proDatasheet/WM8994_Rev4.6.pdf [2] https://statics.cirrus.com/pubs/proDatasheet/WM8958_v3.5.pdf Signed-off-by: Jonathan Bakker Acked-by: Charles Keepax Link: https://lore.kernel.org/r/CY4PR04MB056771CFB80DC447C30D5A31CB1D9@CY4PR04MB0567.namprd04.prod.outlook.com Signed-off-by: Mark Brown --- drivers/regulator/wm8994-regulator.c | 42 ++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486..40befdd9dfa9 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = { }; static const struct regulator_desc wm8994_ldo_desc[] = { + { + .name = "LDO1", + .id = 1, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_1, + .vsel_mask = WM8994_LDO1_VSEL_MASK, + .ops = &wm8994_ldo1_ops, + .min_uV = 2400000, + .uV_step = 100000, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = 2, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_2, + .vsel_mask = WM8994_LDO2_VSEL_MASK, + .ops = &wm8994_ldo2_ops, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, +}; + +static const struct regulator_desc wm8958_ldo_desc[] = { { .name = "LDO1", .id = 1, @@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev) * regulator core and we need not worry about it on the * error path. */ - ldo->regulator = devm_regulator_register(&pdev->dev, - &wm8994_ldo_desc[id], - &config); + if (ldo->wm8994->type == WM8994) { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8994_ldo_desc[id], + &config); + } else { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8958_ldo_desc[id], + &config); + } + if (IS_ERR(ldo->regulator)) { ret = PTR_ERR(ldo->regulator); dev_err(wm8994->dev, "Failed to register LDO%d: %d\n", From 17049bf9de55a42ee96fd34520aff8a484677675 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 4 Apr 2022 10:25:14 +0800 Subject: [PATCH 136/708] regulator: rtq2134: Fix missing active_discharge_on setting The active_discharge_on setting was missed, so output discharge resistor is always disabled. Fix it. Fixes: 0555d41497de ("regulator: rtq2134: Add support for Richtek RTQ2134 SubPMIC") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20220404022514.449231-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/rtq2134-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/rtq2134-regulator.c b/drivers/regulator/rtq2134-regulator.c index f21e3f8b21f2..8e13dea354a2 100644 --- a/drivers/regulator/rtq2134-regulator.c +++ b/drivers/regulator/rtq2134-regulator.c @@ -285,6 +285,7 @@ static const unsigned int rtq2134_buck_ramp_delay_table[] = { .enable_mask = RTQ2134_VOUTEN_MASK, \ .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \ .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \ + .active_discharge_on = RTQ2134_ACTDISCHG_MASK, \ .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \ .ramp_mask = RTQ2134_RSPUP_MASK, \ .ramp_delay_table = rtq2134_buck_ramp_delay_table, \ From 2f8cf5f642e80f8b6b0e660a9c86924a1f41cd80 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Mar 2022 16:00:39 +0200 Subject: [PATCH 137/708] spi: rpc-if: Fix RPM imbalance in probe error path If rpcif_hw_init() fails, Runtime PM is left enabled. Fixes: b04cc0d912eb80d3 ("memory: renesas-rpc-if: Add support for RZ/G2L") Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/1c78a1f447d019bb66b6e7787f520ae78821e2ae.1648562287.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spi-rpc-if.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index fe82f3575df4..24ec1c83f379 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -158,14 +158,18 @@ static int rpcif_spi_probe(struct platform_device *pdev) error = rpcif_hw_init(rpc, false); if (error) - return error; + goto out_disable_rpm; error = spi_register_controller(ctlr); if (error) { dev_err(&pdev->dev, "spi_register_controller failed\n"); - rpcif_disable_rpm(rpc); + goto out_disable_rpm; } + return 0; + +out_disable_rpm: + rpcif_disable_rpm(rpc); return error; } From 35d516bdcd92fde46202d06b68df1166760208fd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Apr 2022 12:11:13 +0200 Subject: [PATCH 138/708] spi: mxic: Fix an error handling path in mxic_spi_probe() If spi_register_master() fails, we must undo a previous mxic_spi_mem_ecc_probe() call, as already done in the remove function. Fixes: 00360ebae483 ("spi: mxic: Add support for pipelined ECC operations") Signed-off-by: Christophe JAILLET Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/09c81f751241f6ec0bac7a48d4ec814a742e0d17.1648980664.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-mxic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 55c092069301..65be8e085ab8 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -813,6 +813,7 @@ static int mxic_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "spi_register_master failed\n"); pm_runtime_disable(&pdev->dev); + mxic_spi_mem_ecc_remove(mxic); } return ret; From 2316f0fc0ad2aa87a568ceaf3d76be983ee555c3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Apr 2022 21:22:35 +0800 Subject: [PATCH 139/708] regulator: atc260x: Fix missing active_discharge_on setting Without active_discharge_on setting, the SWITCH1 discharge enable control is always disabled. Fix it. Fixes: 3b15ccac161a ("regulator: Add regulator driver for ATC260x PMICs") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20220403132235.123727-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/atc260x-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c index 05147d2c3842..485e58b264c0 100644 --- a/drivers/regulator/atc260x-regulator.c +++ b/drivers/regulator/atc260x-regulator.c @@ -292,6 +292,7 @@ enum atc2603c_reg_ids { .bypass_mask = BIT(5), \ .active_discharge_reg = ATC2603C_PMU_SWITCH_CTL, \ .active_discharge_mask = BIT(1), \ + .active_discharge_on = BIT(1), \ .owner = THIS_MODULE, \ } From dbf3f09322141b6f04a33949453b7626f62d9e0b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 4 Apr 2022 07:51:22 +0200 Subject: [PATCH 140/708] tty: serial: mpc52xx_uart: make rx/tx hooks return unsigned, part II. The below commit changed types of some hooks in struct psc_ops. It also changed the types of the functions which are referenced in the instances of the above struct. However the commit did so only for CONFIG_PPC_MPC52xx, but not for CONFIG_PPC_MPC512x. This results in build errors like: mpc52xx_uart.c:static unsigned int mpc52xx_psc_raw_tx_rdy(struct uart_port *port) mpc52xx_uart.c:static int mpc512x_psc_raw_tx_rdy(struct uart_port *port) ^^^ mpc52xx_uart.c:static int mpc5125_psc_raw_tx_rdy(struct uart_port *port) ^^^ Therefore, fix the latter case now too. Fixes: 18662a1d8f35 (tty: serial: mpc52xx_uart: make rx/tx hooks return unsigned) Cc: Linus Torvalds Reported-by: Guenter Roeck Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220404055122.31194-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mpc52xx_uart.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index 8a6958377764..3acc0f185762 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -436,31 +436,31 @@ static void mpc512x_psc_fifo_init(struct uart_port *port) out_be32(&FIFO_512x(port)->rximr, MPC512x_PSC_FIFO_ALARM); } -static int mpc512x_psc_raw_rx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_raw_rx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_512x(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY); } -static int mpc512x_psc_raw_tx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_raw_tx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_512x(port)->txsr) & MPC512x_PSC_FIFO_FULL); } -static int mpc512x_psc_rx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_rx_rdy(struct uart_port *port) { return in_be32(&FIFO_512x(port)->rxsr) & in_be32(&FIFO_512x(port)->rximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc512x_psc_tx_rdy(struct uart_port *port) +static unsigned int mpc512x_psc_tx_rdy(struct uart_port *port) { return in_be32(&FIFO_512x(port)->txsr) & in_be32(&FIFO_512x(port)->tximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc512x_psc_tx_empty(struct uart_port *port) +static unsigned int mpc512x_psc_tx_empty(struct uart_port *port) { return in_be32(&FIFO_512x(port)->txsr) & MPC512x_PSC_FIFO_EMPTY; @@ -780,29 +780,29 @@ static void mpc5125_psc_fifo_init(struct uart_port *port) out_be32(&FIFO_5125(port)->rximr, MPC512x_PSC_FIFO_ALARM); } -static int mpc5125_psc_raw_rx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_raw_rx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_5125(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY); } -static int mpc5125_psc_raw_tx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_raw_tx_rdy(struct uart_port *port) { return !(in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_FULL); } -static int mpc5125_psc_rx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_rx_rdy(struct uart_port *port) { return in_be32(&FIFO_5125(port)->rxsr) & in_be32(&FIFO_5125(port)->rximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc5125_psc_tx_rdy(struct uart_port *port) +static unsigned int mpc5125_psc_tx_rdy(struct uart_port *port) { return in_be32(&FIFO_5125(port)->txsr) & in_be32(&FIFO_5125(port)->tximr) & MPC512x_PSC_FIFO_ALARM; } -static int mpc5125_psc_tx_empty(struct uart_port *port) +static unsigned int mpc5125_psc_tx_empty(struct uart_port *port) { return in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_EMPTY; } From a0ab7e5bc9651d65637f50ee9c09e083919677ed Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Apr 2022 09:44:32 +0100 Subject: [PATCH 141/708] Revert "arm64: Change elfcore for_each_mte_vma() to use VMA iterator" This reverts commit 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae. Revert this temporary bodge. It only existed to ease integration with the maple tree work for the 5.18 merge window and that doesn't appear to have landed in any case. Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3ed39c61a510..3455ee4acc04 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,16 +8,9 @@ #include #include -#ifndef VMA_ITERATOR -#define VMA_ITERATOR(name, mm, addr) \ - struct mm_struct *name = mm -#define for_each_vma(vmi, vma) \ - for (vma = vmi->mmap; vma; vma = vma->vm_next) -#endif - -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(tsk, vma) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -72,9 +65,8 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) vma_count++; return vma_count; @@ -83,9 +75,8 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -109,9 +100,8 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -120,9 +110,8 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; From 16decce22efa0813beafbc9084181e299b69a1a1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Apr 2022 16:13:56 +0100 Subject: [PATCH 142/708] arm64: mte: Fix the stack frame size warning in mte_dump_tag_range() With 64K page configurations, the tags array stored on the stack of the mte_dump_tag_range() function is 2048 bytes, triggering a compiler warning when CONFIG_FRAME_WARN is enabled. Switch to a kmalloc() allocation via mte_allocate_tag_storage(). Signed-off-by: Catalin Marinas Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Reported-by: kernel test robot Cc: Will Deacon Link: https://lore.kernel.org/r/20220401151356.1674232-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3455ee4acc04..2b3f3d0544b9 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -25,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) static int mte_dump_tag_range(struct coredump_params *cprm, unsigned long start, unsigned long end) { + int ret = 1; unsigned long addr; + void *tags = NULL; for (addr = start; addr < end; addr += PAGE_SIZE) { - char tags[MTE_PAGE_TAG_STORAGE]; struct page *page = get_dump_page(addr); /* @@ -52,13 +53,28 @@ static int mte_dump_tag_range(struct coredump_params *cprm, continue; } + if (!tags) { + tags = mte_allocate_tag_storage(); + if (!tags) { + put_page(page); + ret = 0; + break; + } + } + mte_save_page_tags(page_address(page), tags); put_page(page); - if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) - return 0; + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { + mte_free_tag_storage(tags); + ret = 0; + break; + } } - return 1; + if (tags) + mte_free_tag_storage(tags); + + return ret; } Elf_Half elf_core_extra_phdrs(void) From 8362f5217bc69c3cd30da73cd2d2ae3af4cc8117 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Wed, 8 Dec 2021 19:41:49 +0100 Subject: [PATCH 143/708] dt-bindings: reset: document deprecated HiSilicon property Documenting deprecated property prevents dt-schema validation errors. Signed-off-by: David Heidelberg Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211208184149.99537-1-david@ixit.cz Signed-off-by: Philipp Zabel --- .../devicetree/bindings/reset/hisilicon,hi3660-reset.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml b/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml index b0c41ab1a746..cdfcf32c53fa 100644 --- a/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml +++ b/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml @@ -24,6 +24,11 @@ properties: - const: hisilicon,hi3670-reset - const: hisilicon,hi3660-reset + hisi,rst-syscon: + deprecated: true + description: phandle of the reset's syscon, use hisilicon,rst-syscon instead + $ref: /schemas/types.yaml#/definitions/phandle + hisilicon,rst-syscon: description: phandle of the reset's syscon. $ref: /schemas/types.yaml#/definitions/phandle From da18980a855edf44270f05455e0ec3f2472f64cc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 15 Dec 2021 11:25:46 +0100 Subject: [PATCH 144/708] reset: renesas: Check return value of reset_control_deassert() Deasserting the reset is vital, therefore bail out in case of error. Suggested-by: Biju Das Signed-off-by: Heiner Kallweit Reviewed-by: Biju Das Link: https://lore.kernel.org/r/b2131908-0110-006b-862f-080517f3e2d8@gmail.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-rzg2l-usbphy-ctrl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index 1e8315038850..a8dde4606360 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -121,7 +121,9 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(priv->rstc), "failed to get reset\n"); - reset_control_deassert(priv->rstc); + error = reset_control_deassert(priv->rstc); + if (error) + return error; priv->rcdev.ops = &rzg2l_usbphy_ctrl_reset_ops; priv->rcdev.of_reset_n_cells = 1; From d1da1052ffad63aa5181b69f20a6952e31f339c2 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Wed, 12 Jan 2022 19:26:46 +0530 Subject: [PATCH 145/708] reset: tegra-bpmp: Restore Handle errors in BPMP response This reverts following commit 69125b4b9440 ("reset: tegra-bpmp: Revert Handle errors in BPMP response"). The Tegra194 HDA reset failure is fixed by commit d278dc9151a0 ("ALSA: hda/tegra: Fix Tegra194 HDA reset failure"). The temporary revert of original commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP response") can be removed now. Signed-off-by: Sameer Pujar Tested-by: Jon Hunter Reviewed-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/1641995806-15245-1-git-send-email-spujar@nvidia.com --- drivers/reset/tegra/reset-bpmp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc..4c5bba52b105 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, From 5524cbb1bfcdff0cad0aaa9f94e6092002a07259 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Thu, 31 Mar 2022 11:39:26 -0400 Subject: [PATCH 146/708] arch/arm64: Fix topology initialization for core scheduling Arm64 systems rely on store_cpu_topology() to call update_siblings_masks() to transfer the toplogy to the various cpu masks. This needs to be done before the call to notify_cpu_starting() which tells the scheduler about each cpu found, otherwise the core scheduling data structures are setup in a way that does not match the actual topology. With smt_mask not setup correctly we bail on `cpumask_weight(smt_mask) == 1` for !leaders in: notify_cpu_starting() cpuhp_invoke_callback_range() sched_cpu_starting() sched_core_cpu_starting() which leads to rq->core not being correctly set for !leader-rq's. Without this change stress-ng (which enables core scheduling in its prctl tests in newer versions -- i.e. with PR_SCHED_CORE support) causes a warning and then a crash (trimmed for legibility): [ 1853.805168] ------------[ cut here ]------------ [ 1853.809784] task_rq(b)->core != rq->core [ 1853.809792] WARNING: CPU: 117 PID: 0 at kernel/sched/fair.c:11102 cfs_prio_less+0x1b4/0x1c4 ... [ 1854.015210] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 ... [ 1854.231256] Call trace: [ 1854.233689] pick_next_task+0x3dc/0x81c [ 1854.237512] __schedule+0x10c/0x4cc [ 1854.240988] schedule_idle+0x34/0x54 Fixes: 9edeaea1bc45 ("sched: Core-wide rq->lock") Signed-off-by: Phil Auld Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Link: https://lore.kernel.org/r/20220331153926.25742-1-pauld@redhat.com Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1e6baa..3b46041f2b97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void) * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); + store_cpu_topology(cpu); /* * Enable GIC and timers. @@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void) ipi_setup(cpu); - store_cpu_topology(cpu); numa_add_cpu(cpu); /* From dd671f16b1cdb188aa64d740a408f7d00e281444 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 18 Mar 2022 11:37:05 +0100 Subject: [PATCH 147/708] arm64: fix typos in comments Various spelling mistakes in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20220318103729.157574-10-Julia.Lawall@inria.fr [will: Squashed in 20220318103729.157574-28-Julia.Lawall@inria.fr] Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/arm64/kernel/module-plts.c | 2 +- arch/arm64/kernel/suspend.c | 2 +- arch/arm64/mm/init.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 712e97c03e54..cd868084e724 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler); * addresses. There is no straight-forward way, short of disassembling the * offending instruction, to map that address back to the watchpoint. This * function computes the distance of the memory access from the watchpoint as a - * heuristic for the likelyhood that a given access triggered the watchpoint. + * heuristic for the likelihood that a given access triggered the watchpoint. * * See Section D2.10.5 "Determining the memory location that caused a Watchpoint * exception" of ARMv8 Architecture Reference Manual for details. diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index e53493d8b208..a3d0494f25a9 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, * increasing the section's alignment so that the * resulting address of this instruction is guaranteed * to equal the offset in that particular bit (as well - * as all less signficant bits). This ensures that the + * as all less significant bits). This ensures that the * address modulo 4 KB != 0xfff8 or 0xfffc (which would * have all ones in bits [11:3]) */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 19ee7c33769d..2b0887e58a7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully - * renabled if it was enabled when core started shutdown. + * reenabled if it was enabled when core started shutdown. */ local_daif_restore(flags); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8ac25f19084e..1e7b1550e2fc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr); * In this scheme a comparatively quicker boot is observed. * * If ZONE_DMA configs are defined, crash kernel memory reservation - * is delayed until DMA zone memory range size initilazation performed in + * is delayed until DMA zone memory range size initialization performed in * zone_sizes_init(). The defer is necessary to steer clear of DMA zone * memory range to avoid overlap allocation. So crash kernel memory boundaries * are not known when mapping all bank memory ranges, which otherwise means @@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr); * so page-granularity mappings are created for the entire memory range. * Hence a slightly slower boot is observed. * - * Note: Page-granularity mapppings are necessary for crash kernel memory + * Note: Page-granularity mappings are necessary for crash kernel memory * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) From 4dfa1f3657a0d4fb556d4440322d35bdcf5e4970 Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Mon, 21 Mar 2022 10:56:27 +0800 Subject: [PATCH 148/708] arm64: Fix comments in macro __init_el2_gicv3 Fix typo in comment. Signed-off-by: Zhiyuan Dai Link: https://lore.kernel.org/r/1647831387-3686-1-git-send-email-daizhiyuan@phytium.com.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0ce..c31be7eda9df 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm From 03cb66463b5547b289099a95ac4ea591cca88ca9 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Wed, 30 Mar 2022 14:11:18 +0900 Subject: [PATCH 149/708] dt-bindings: reset: Add parent "resets" property as optional LD11 mio reset controller has a reset lines from system controller. Add parent "resets" property to fix the following warning. uniphier-ld11-global.dt.yaml: reset: 'resets' does not match any of the regexes: 'pinctrl-[0-9]+' From schema: Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml Signed-off-by: Kunihiko Hayashi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/1648617078-8312-1-git-send-email-hayashi.kunihiko@socionext.com --- .../devicetree/bindings/reset/socionext,uniphier-reset.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml b/Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml index 377a7d242323..6566804ec567 100644 --- a/Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml +++ b/Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml @@ -55,6 +55,9 @@ properties: "#reset-cells": const: 1 + resets: + maxItems: 1 + additionalProperties: false required: From 2012a9e279013933885983cbe0a5fe828052563b Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 27 Mar 2022 13:57:33 +0800 Subject: [PATCH 150/708] perf: qcom_l2_pmu: fix an incorrect NULL check on list iterator The bug is here: return cluster; The list iterator value 'cluster' will *always* be set and non-NULL by list_for_each_entry(), so it is incorrect to assume that the iterator value will be NULL if the list is empty or no element is found. To fix the bug, return 'cluster' when found, otherwise return NULL. Cc: stable@vger.kernel.org Fixes: 21bdbb7102ed ("perf: add qcom l2 cache perf events driver") Signed-off-by: Xiaomeng Tong Link: https://lore.kernel.org/r/20220327055733.4070-1-xiam0nd.tong@gmail.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 7640491aab12..30234c261b05 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( { u64 mpidr; int cpu_cluster_id; - struct cluster_pmu *cluster = NULL; + struct cluster_pmu *cluster; /* * This assumes that the cluster_id is in MPIDR[aff1] for @@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( cluster->cluster_id); cpumask_set_cpu(cpu, &cluster->cluster_cpus); *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; - break; + return cluster; } - return cluster; + return NULL; } static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) From 1d8e926a04b948f03b3c98aabf7e0033ac12ffbc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Mar 2022 15:10:10 +0200 Subject: [PATCH 151/708] perf: MARVELL_CN10K_DDR_PMU should depend on ARCH_THUNDER The Marvell CN10K DRAM Subsystem (DSS) performance monitor is only present on Marvell CN10K SoCs. Hence add a dependency on ARCH_THUNDER, to prevent asking the user about this driver when configuring a kernel without Cavium Thunder (incl. Marvell CN10K) SoC support, Fixes: 68fa55f0e05c ("perf/marvell: cn10k DDR perf event core ownership") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/18bfd6e1bcf67db7ea656d684a8bbb68261eeb54.1648559364.git.geert+renesas@glider.be Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index afdcb91601d2..1e2d69453771 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -187,7 +187,7 @@ source "drivers/perf/hisilicon/Kconfig" config MARVELL_CN10K_DDR_PMU tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enable perf support for Marvell DDR Performance monitoring event on CN10K platform. From 7e2646ed47542123168d43916b84b954532e5386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 18 Mar 2022 15:14:41 +0100 Subject: [PATCH 152/708] Revert "mmc: sdhci-xenon: fix annoying 1.8V regulator warning" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bb32e1987bc55ce1db400faf47d85891da3c9b9f. Commit 1a3ed0dc3594 ("mmc: sdhci-xenon: fix 1.8v regulator stabilization") contains proper fix for the issue described in commit bb32e1987bc5 ("mmc: sdhci-xenon: fix annoying 1.8V regulator warning"). Fixes: 8d876bf472db ("mmc: sdhci-xenon: wait 5ms after set 1.8V signal enable") Cc: stable@vger.kernel.org # 1a3ed0dc3594 ("mmc: sdhci-xenon: fix 1.8v regulator stabilization") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/20220318141441.32329-1-pali@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 666cee4c7f7c..08e838400b52 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -241,16 +241,6 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); - - /* - * For some reason the controller's Host Control2 register reports - * the bit representing 1.8V signaling as 0 when read after it was - * written as 1. Subsequent read reports 1. - * - * Since this may cause some issues, do an empty read of the Host - * Control2 register here to circumvent this. - */ - sdhci_readw(host, SDHCI_HOST_CONTROL2); } static unsigned int xenon_get_max_clock(struct sdhci_host *host) From 0d319dd5a27183b75d984e3dc495248e59f99334 Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Thu, 17 Mar 2022 12:19:43 +0100 Subject: [PATCH 153/708] mmc: mmci: stm32: correctly check all elements of sg list Use sg and not data->sg when checking sg list elements. Else only the first element alignment is checked. The last element should be checked the same way, for_each_sg already set sg to sg_next(sg). Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Signed-off-by: Yann Gautier Link: https://lore.kernel.org/r/20220317111944.116148-2-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 9c13f2c31365..4566d7fc9055 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, * excepted the last element which has no constraint on idmasize */ for_each_sg(data->sg, sg, data->sg_len - 1, i) { - if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) || - !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { dev_err(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); @@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, } } - if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { dev_err(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); From 5d435933376962b107bd76970912e7e80247dcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=B6hle?= Date: Thu, 24 Mar 2022 14:18:41 +0000 Subject: [PATCH 154/708] mmc: block: Check for errors after write on SPI Introduce a SEND_STATUS check for writes through SPI to not mark an unsuccessful write as successful. Since SPI SD/MMC does not have states, after a write, the card will just hold the line LOW until it is ready again. The driver marks the write therefore as completed as soon as it reads something other than all zeroes. The driver does not distinguish from a card no longer signalling busy and it being disconnected (and the line being pulled-up by the host). This lead to writes being marked as successful when disconnecting a busy card. Now the card is ensured to be still connected by an additional CMD13, just like non-SPI is ensured to go back to TRAN state. While at it and since we already poll for the post-write status anyway, we might as well check for SPIs error bits (any of them). The disconnecting card problem is reproducable for me after continuous write activity and randomly disconnecting, around every 20-50 tries on SPI DS for some card. Fixes: 7213d175e3b6f ("MMC/SD card driver learns SPI") Cc: stable@vger.kernel.org Signed-off-by: Christian Loehle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/76f6f5d2b35543bab3dfe438f268609c@hyperstone.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4e67c1403cc9..be2078684417 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1880,6 +1880,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } +static int mmc_spi_err_check(struct mmc_card *card) +{ + u32 status = 0; + int err; + + /* + * SPI does not have a TRAN state we have to wait on, instead the + * card is ready again when it no longer holds the line LOW. + * We still have to ensure two things here before we know the write + * was successful: + * 1. The card has not disconnected during busy and we actually read our + * own pull-up, thinking it was still connected, so ensure it + * still responds. + * 2. Check for any error bits, in particular R1_SPI_IDLE to catch a + * just reconnected card after being disconnected during busy. + */ + err = __mmc_send_status(card, &status, 0); + if (err) + return err; + /* All R1 and R2 bits of SPI are errors in our case */ + if (status) + return -EIO; + return 0; +} + static int mmc_blk_busy_cb(void *cb_data, bool *busy) { struct mmc_blk_busy_data *data = cb_data; @@ -1903,9 +1928,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) struct mmc_blk_busy_data cb_data; int err; - if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) + if (rq_data_dir(req) == READ) return 0; + if (mmc_host_is_spi(card->host)) { + err = mmc_spi_err_check(card); + if (err) + mqrq->brq.data.bytes_xfered = 0; + return err; + } + cb_data.card = card; cb_data.status = 0; err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS, From 08ebf903af57cda6d773f3dd1671b64f73b432b8 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 31 Mar 2022 15:32:23 +0800 Subject: [PATCH 155/708] mmc: core: Fixup support for writeback-cache for eMMC and SD During the card initialization process, the mmc core checks whether the eMMC/SD card supports an internal writeback-cache and then enables it inside the card. Unfortunately, this isn't according to what the mmc core reports to the upper block layer. Instead, the writeback-cache support with REQ_FLUSH and REQ_FUA, are being enabled depending on whether the host supports the CMD23 (MMC_CAP_CMD23) and whether an eMMC supports the reliable-write command. This is wrong and it may also sound awkward. In fact, it's a remnant from when both eMMC/SD cards didn't have dedicated commands/support to control the internal writeback-cache. In other words, it was the best we could do at that point in time. To fix the problem, but also without breaking backwards compatibility, let's align the REQ_FLUSH support with whether the writeback-cache became successfully enabled - for both eMMC and SD cards. Cc: stable@kernel.org Fixes: 881d1c25f765 ("mmc: core: Add cache control for eMMC4.5 device") Fixes: 130206a615a9 ("mmc: core: Add support for cache ctrl for SD cards") Depends-on: 97fce126e279 ("mmc: block: Issue a cache flush only when it's enabled") Reviewed-by: Avri Altman Signed-off-by: Michael Wu Link: https://lore.kernel.org/r/20220331073223.106415-1-michael@allwinnertech.com [Ulf: Re-wrote the commit message] Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index be2078684417..db99882c95d8 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2382,6 +2382,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; + bool cache_enabled = false; + bool fua_enabled = false; devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL); if (devidx < 0) { @@ -2461,13 +2463,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->flags |= MMC_BLK_CMD23; } - if (mmc_card_mmc(card) && - md->flags & MMC_BLK_CMD23 && + if (md->flags & MMC_BLK_CMD23 && ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || card->ext_csd.rel_sectors)) { md->flags |= MMC_BLK_REL_WR; - blk_queue_write_cache(md->queue.queue, true, true); + fua_enabled = true; + cache_enabled = true; } + if (mmc_cache_enabled(card->host)) + cache_enabled = true; + + blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled); string_get_size((u64)size, 512, STRING_UNITS_2, cap_str, sizeof(cap_str)); From b117c88df0e3b48903c36f97be92bac6a9e03df7 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Apr 2022 13:05:20 +0200 Subject: [PATCH 156/708] dt-bindings: net: micrel: Revert latency support and timestamping check Revert latency support from binding. Based on the discussion[1], the DT is the wrong place to have the lantecies for the PHY. [1] https://lkml.org/lkml/2022/3/4/325 Fixes: 2358dd3fd325fc ("dt-bindings: net: micrel: Configure latency values and timestamping check for LAN8814 phy") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- .../devicetree/bindings/net/micrel.txt | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt index c5ab62c39133..8d157f0295a5 100644 --- a/Documentation/devicetree/bindings/net/micrel.txt +++ b/Documentation/devicetree/bindings/net/micrel.txt @@ -45,20 +45,3 @@ Optional properties: In fiber mode, auto-negotiation is disabled and the PHY can only work in 100base-fx (full and half duplex) modes. - - - lan8814,ignore-ts: If present the PHY will not support timestamping. - - This option acts as check whether Timestamping is supported by - hardware or not. LAN8814 phy support hardware tmestamping. - - - lan8814,latency_rx_10: Configures Latency value of phy in ingress at 10 Mbps. - - - lan8814,latency_tx_10: Configures Latency value of phy in egress at 10 Mbps. - - - lan8814,latency_rx_100: Configures Latency value of phy in ingress at 100 Mbps. - - - lan8814,latency_tx_100: Configures Latency value of phy in egress at 100 Mbps. - - - lan8814,latency_rx_1000: Configures Latency value of phy in ingress at 1000 Mbps. - - - lan8814,latency_tx_1000: Configures Latency value of phy in egress at 1000 Mbps. From b814403a8cd8b28a2c0497e211f029786394531d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Apr 2022 13:05:21 +0200 Subject: [PATCH 157/708] net: phy: micrel: Remove latency from driver Based on the discussions here[1], the PHY driver is the wrong place to set the latencies, therefore remove them. [1] https://lkml.org/lkml/2022/3/4/325 Fixes: ece19502834d84 ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 102 +-------------------------------------- 1 file changed, 1 insertion(+), 101 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 19b11e896460..a873df07ad24 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -99,15 +99,6 @@ #define PTP_TIMESTAMP_EN_PDREQ_ BIT(2) #define PTP_TIMESTAMP_EN_PDRES_ BIT(3) -#define PTP_RX_LATENCY_1000 0x0224 -#define PTP_TX_LATENCY_1000 0x0225 - -#define PTP_RX_LATENCY_100 0x0222 -#define PTP_TX_LATENCY_100 0x0223 - -#define PTP_RX_LATENCY_10 0x0220 -#define PTP_TX_LATENCY_10 0x0221 - #define PTP_TX_PARSE_L2_ADDR_EN 0x0284 #define PTP_RX_PARSE_L2_ADDR_EN 0x0244 @@ -268,15 +259,6 @@ struct lan8814_ptp_rx_ts { u16 seq_id; }; -struct kszphy_latencies { - u16 rx_10; - u16 tx_10; - u16 rx_100; - u16 tx_100; - u16 rx_1000; - u16 tx_1000; -}; - struct kszphy_ptp_priv { struct mii_timestamper mii_ts; struct phy_device *phydev; @@ -296,7 +278,6 @@ struct kszphy_ptp_priv { struct kszphy_priv { struct kszphy_ptp_priv ptp_priv; - struct kszphy_latencies latencies; const struct kszphy_type *type; int led_mode; bool rmii_ref_clk_sel; @@ -304,14 +285,6 @@ struct kszphy_priv { u64 stats[ARRAY_SIZE(kszphy_hw_stats)]; }; -static struct kszphy_latencies lan8814_latencies = { - .rx_10 = 0x22AA, - .tx_10 = 0x2E4A, - .rx_100 = 0x092A, - .tx_100 = 0x02C1, - .rx_1000 = 0x01AD, - .tx_1000 = 0x00C9, -}; static const struct kszphy_type ksz8021_type = { .led_mode_reg = MII_KSZPHY_CTRL_2, .has_broadcast_disable = true, @@ -2618,55 +2591,6 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) return 0; } -static int lan8814_read_status(struct phy_device *phydev) -{ - struct kszphy_priv *priv = phydev->priv; - struct kszphy_latencies *latencies = &priv->latencies; - int err; - int regval; - - err = genphy_read_status(phydev); - if (err) - return err; - - switch (phydev->speed) { - case SPEED_1000: - lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_1000, - latencies->rx_1000); - lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_1000, - latencies->tx_1000); - break; - case SPEED_100: - lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_100, - latencies->rx_100); - lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_100, - latencies->tx_100); - break; - case SPEED_10: - lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_10, - latencies->rx_10); - lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_10, - latencies->tx_10); - break; - default: - break; - } - - /* Make sure the PHY is not broken. Read idle error count, - * and reset the PHY if it is maxed out. - */ - regval = phy_read(phydev, MII_STAT1000); - if ((regval & 0xFF) == 0xFF) { - phy_init_hw(phydev); - phydev->link = 0; - if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) - phydev->drv->config_intr(phydev); - return genphy_config_aneg(phydev); - } - - return 0; -} - static int lan8814_config_init(struct phy_device *phydev) { int val; @@ -2690,27 +2614,6 @@ static int lan8814_config_init(struct phy_device *phydev) return 0; } -static void lan8814_parse_latency(struct phy_device *phydev) -{ - const struct device_node *np = phydev->mdio.dev.of_node; - struct kszphy_priv *priv = phydev->priv; - struct kszphy_latencies *latency = &priv->latencies; - u32 val; - - if (!of_property_read_u32(np, "lan8814,latency_rx_10", &val)) - latency->rx_10 = val; - if (!of_property_read_u32(np, "lan8814,latency_tx_10", &val)) - latency->tx_10 = val; - if (!of_property_read_u32(np, "lan8814,latency_rx_100", &val)) - latency->rx_100 = val; - if (!of_property_read_u32(np, "lan8814,latency_tx_100", &val)) - latency->tx_100 = val; - if (!of_property_read_u32(np, "lan8814,latency_rx_1000", &val)) - latency->rx_1000 = val; - if (!of_property_read_u32(np, "lan8814,latency_tx_1000", &val)) - latency->tx_1000 = val; -} - static int lan8814_probe(struct phy_device *phydev) { const struct device_node *np = phydev->mdio.dev.of_node; @@ -2724,8 +2627,6 @@ static int lan8814_probe(struct phy_device *phydev) priv->led_mode = -1; - priv->latencies = lan8814_latencies; - phydev->priv = priv; if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) || @@ -2746,7 +2647,6 @@ static int lan8814_probe(struct phy_device *phydev) return err; } - lan8814_parse_latency(phydev); lan8814_ptp_init(phydev); return 0; @@ -2928,7 +2828,7 @@ static struct phy_driver ksphy_driver[] = { .config_init = lan8814_config_init, .probe = lan8814_probe, .soft_reset = genphy_soft_reset, - .read_status = lan8814_read_status, + .read_status = ksz9031_read_status, .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, From 76e9ccd6894377bc3cf7fbdea90b0af2cb4eb12a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Apr 2022 13:05:22 +0200 Subject: [PATCH 158/708] net: phy: micrel: Remove DT option lan8814,ignore-ts When the PHY and the MAC are capable of doing timestamping, the PHY has priority. Therefore the DT option lan8814,ignore-ts was added such that the PHY will not expose a PHC so then the timestamping was done in the MAC. This is not the correct approach of doing it, therefore remove this. Fixes: ece19502834d84 ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a873df07ad24..fc53b71dc872 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2616,7 +2616,6 @@ static int lan8814_config_init(struct phy_device *phydev) static int lan8814_probe(struct phy_device *phydev) { - const struct device_node *np = phydev->mdio.dev.of_node; struct kszphy_priv *priv; u16 addr; int err; @@ -2630,8 +2629,7 @@ static int lan8814_probe(struct phy_device *phydev) phydev->priv = priv; if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) || - !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING) || - of_property_read_bool(np, "lan8814,ignore-ts")) + !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING)) return 0; /* Strap-in value for PHY address, below register read gives starting From 20921c0c86092b4082c91bd7c88305da74e5520b Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 1 Apr 2022 11:53:04 -0700 Subject: [PATCH 159/708] qed: fix ethtool register dump To fix a coverity complain, commit d5ac07dfbd2b ("qed: Initialize debug string array") removed "sw-platform" (one of the common global parameters) from the dump as this was used in the dump with an uninitialized string, however it did not reduce the number of common global parameters which caused the incorrect (unable to parse) register dump this patch fixes it with reducing NUM_COMMON_GLOBAL_PARAMS bye one. Cc: stable@vger.kernel.org Cc: Tim Gardner Cc: "David S. Miller" Fixes: d5ac07dfbd2b ("qed: Initialize debug string array") Signed-off-by: Prabhakar Kushwaha Signed-off-by: Alok Prasad Signed-off-by: Ariel Elior Signed-off-by: Manish Chopra Reviewed-by: Tim Gardner Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index e3edca187ddf..5250d1d1e49c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -489,7 +489,7 @@ struct split_type_defs { #define STATIC_DEBUG_LINE_DWORDS 9 -#define NUM_COMMON_GLOBAL_PARAMS 11 +#define NUM_COMMON_GLOBAL_PARAMS 10 #define MAX_RECURSION_DEPTH 10 From 4f81def272de17dc4bbd89ac38f49b2676c9b3d2 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Fri, 1 Apr 2022 20:21:10 -0400 Subject: [PATCH 160/708] bnxt_en: Synchronize tx when xdp redirects happen on same ring If there are more CPUs than the number of TX XDP rings, multiple XDP redirects can select the same TX ring based on the CPU on which XDP redirect is called. Add locking when needed and use static key to decide whether to take the lock. Fixes: f18c2b77b2e4 ("bnxt_en: optimized XDP_REDIRECT support") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 8 ++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 ++ 4 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1c28495875cf..874fad0a5cf8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3253,6 +3253,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) } qidx = bp->tc_to_qidx[j]; ring->queue_id = bp->q_info[qidx].queue_id; + spin_lock_init(&txr->xdp_tx_lock); if (i < bp->tx_nr_rings_xdp) continue; if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1)) @@ -10338,6 +10339,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (irq_re_init) udp_tunnel_nic_reset_ntf(bp->dev); + if (bp->tx_nr_rings_xdp < num_possible_cpus()) { + if (!static_key_enabled(&bnxt_xdp_locking_key)) + static_branch_enable(&bnxt_xdp_locking_key); + } else if (static_key_enabled(&bnxt_xdp_locking_key)) { + static_branch_disable(&bnxt_xdp_locking_key); + } set_bit(BNXT_STATE_OPEN, &bp->state); bnxt_enable_int(bp); /* Enable TX queues */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 61aa3e8c5952..b4d3d051463b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -800,6 +800,8 @@ struct bnxt_tx_ring_info { u32 dev_state; struct bnxt_ring_struct tx_ring_struct; + /* Synchronize simultaneous xdp_xmit on same ring */ + spinlock_t xdp_tx_lock; }; #define BNXT_LEGACY_COAL_CMPL_PARAMS \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 52fad0fdeacf..c0541ff00ac8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -20,6 +20,8 @@ #include "bnxt.h" #include "bnxt_xdp.h" +DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); + struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len) @@ -227,6 +229,9 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, ring = smp_processor_id() % bp->tx_nr_rings_xdp; txr = &bp->tx_ring[ring]; + if (static_branch_unlikely(&bnxt_xdp_locking_key)) + spin_lock(&txr->xdp_tx_lock); + for (i = 0; i < num_frames; i++) { struct xdp_frame *xdp = frames[i]; @@ -250,6 +255,9 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, bnxt_db_write(bp, &txr->tx_db, txr->tx_prod); } + if (static_branch_unlikely(&bnxt_xdp_locking_key)) + spin_unlock(&txr->xdp_tx_lock); + return nxmit; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 0df40c3beb05..067bb5e821f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -10,6 +10,8 @@ #ifndef BNXT_XDP_H #define BNXT_XDP_H +DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); + struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len); From facc173cf700e55b2ad249ecbd3a7537f7315691 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Fri, 1 Apr 2022 20:21:11 -0400 Subject: [PATCH 161/708] bnxt_en: reserve space inside receive page for skb_shared_info Insufficient space was being reserved in the page used for packet reception, so the interface MTU could be set too large to still have room for the contents of the packet when doing XDP redirect. This resulted in the following message when redirecting a packet between 3520 and 3822 bytes with an MTU of 3822: [311815.561880] XDP_WARN: xdp_update_frame_from_buff(line:200): Driver BUG: missing reserved tailroom Fixes: f18c2b77b2e4 ("bnxt_en: optimized XDP_REDIRECT support") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index b4d3d051463b..98453a78cbd0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -593,7 +593,8 @@ struct nqe_cn { #define BNXT_MAX_MTU 9500 #define BNXT_MAX_PAGE_MODE_MTU \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ - XDP_PACKET_HEADROOM) + XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 From 27d4073f8d9af0340362554414f4961643a4f4de Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Fri, 1 Apr 2022 20:21:12 -0400 Subject: [PATCH 162/708] bnxt_en: Prevent XDP redirect from running when stopping TX queue Add checks in the XDP redirect callback to prevent XDP from running when the TX ring is undergoing shutdown. Also remove redundant checks in the XDP redirect callback to validate the txr and the flag that indicates the ring supports XDP. The modulo arithmetic on 'tx_nr_rings_xdp' already guarantees the derived TX ring is an XDP ring. txr is also guaranteed to be valid after checking BNXT_STATE_OPEN and within RCU grace period. Fixes: f18c2b77b2e4 ("bnxt_en: optimized XDP_REDIRECT support") Reviewed-by: Vladimir Olovyannikov Signed-off-by: Ray Jui Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index c0541ff00ac8..03b1d6c04504 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -229,14 +229,16 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, ring = smp_processor_id() % bp->tx_nr_rings_xdp; txr = &bp->tx_ring[ring]; + if (READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING) + return -EINVAL; + if (static_branch_unlikely(&bnxt_xdp_locking_key)) spin_lock(&txr->xdp_tx_lock); for (i = 0; i < num_frames; i++) { struct xdp_frame *xdp = frames[i]; - if (!txr || !bnxt_tx_avail(bp, txr) || - !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) + if (!bnxt_tx_avail(bp, txr)) break; mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len, From 46d4820f949a3030b19ee482c68a50b06dd27590 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 4 Apr 2022 12:05:08 +0200 Subject: [PATCH 163/708] mmc: renesas_sdhi: special 4tap settings only apply to HS400 Previous documentation was vague, so we included SDR104 for slow SDnH clock settings. It turns out now, that it is only needed for HS400. Fixes: bb6d3fa98a41 ("clk: renesas: rcar-gen3: Switch to new SD clock handling") Cc: stable@vger.kernel.org Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20220404100508.3209-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 2797a9c0f17d..2a4d314aa027 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -144,9 +144,9 @@ static unsigned int renesas_sdhi_clk_update(struct tmio_mmc_host *host, return clk_get_rate(priv->clk); if (priv->clkh) { + /* HS400 with 4TAP needs different clock settings */ bool use_4tap = priv->quirks && priv->quirks->hs400_4taps; - bool need_slow_clkh = (host->mmc->ios.timing == MMC_TIMING_UHS_SDR104) || - (host->mmc->ios.timing == MMC_TIMING_MMC_HS400); + bool need_slow_clkh = host->mmc->ios.timing == MMC_TIMING_MMC_HS400; clkh_shift = use_4tap && need_slow_clkh ? 1 : 2; ref_clk = priv->clkh; } From 2baed4f9b085724a8a34add832d4763f3d83f877 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 3 Apr 2022 10:02:02 -0400 Subject: [PATCH 164/708] stmmac: dwmac-loongson: change loongson_dwmac_driver from global to static Smatch reports this issue dwmac-loongson.c:208:19: warning: symbol 'loongson_dwmac_driver' was not declared. Should it be static? loongson_dwmac_driver is only used in dwmac-loongson.c. File scope variables used only in one file should be static. Change loongson_dwmac_driver's storage-class-specifier from global to static. Signed-off-by: Tom Rix Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index ecf759ee1c9f..017dbbda0c1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -205,7 +205,7 @@ static const struct pci_device_id loongson_dwmac_id_table[] = { }; MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); -struct pci_driver loongson_dwmac_driver = { +static struct pci_driver loongson_dwmac_driver = { .name = "dwmac-loongson-pci", .id_table = loongson_dwmac_id_table, .probe = loongson_dwmac_probe, From 458f5d92df4807e2a7c803ed928369129996bf96 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Mon, 4 Apr 2022 11:48:51 +0100 Subject: [PATCH 165/708] sfc: Do not free an empty page_ring When the page_ring is not used page_ptr_mask is 0. Do not dereference page_ring[0] in this case. Fixes: 2768935a4660 ("sfc: reuse pages to avoid DMA mapping/unmapping costs") Reported-by: Taehee Yoo Signed-off-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 1b22c7be0088..fa8b9aacca11 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -150,6 +150,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) struct efx_nic *efx = rx_queue->efx; int i; + if (unlikely(!rx_queue->page_ring)) + return; + /* Unmap and release the pages in the recycle ring. Remove the ring. */ for (i = 0; i <= rx_queue->page_ptr_mask; i++) { struct page *page = rx_queue->page_ring[i]; From 5467801f1fcbdc46bc7298a84dbf3ca1ff2a7320 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Mon, 21 Mar 2022 19:02:41 +0530 Subject: [PATCH 166/708] gpio: Restrict usage of GPIO chip irq members before initialization GPIO chip irq members are exposed before they could be completely initialized and this leads to race conditions. One such issue was observed for the gc->irq.domain variable which was accessed through the I2C interface in gpiochip_to_irq() before it could be initialized by gpiochip_add_irqchip(). This resulted in Kernel NULL pointer dereference. Following are the logs for reference :- kernel: Call Trace: kernel: gpiod_to_irq+0x53/0x70 kernel: acpi_dev_gpio_irq_get_by+0x113/0x1f0 kernel: i2c_acpi_get_irq+0xc0/0xd0 kernel: i2c_device_probe+0x28a/0x2a0 kernel: really_probe+0xf2/0x460 kernel: RIP: 0010:gpiochip_to_irq+0x47/0xc0 To avoid such scenarios, restrict usage of GPIO chip irq members before they are completely initialized. Signed-off-by: Shreeya Patel Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 19 +++++++++++++++++++ include/linux/gpio/driver.h | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e59884cc12a7..085348e08986 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1404,6 +1404,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; +#ifdef CONFIG_GPIOLIB_IRQCHIP + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + if (!gc->irq.initialized) + return -EPROBE_DEFER; +#endif + if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; @@ -1593,6 +1603,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, acpi_gpiochip_request_interrupts(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + return 0; } diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 98c93510640e..874aabd270c9 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -221,6 +221,15 @@ struct gpio_irq_chip { */ bool per_parent_data; + /** + * @initialized: + * + * Flag to track GPIO chip irq member's initialization. + * This flag will make sure GPIO chip irq members are not used + * before they are initialized. + */ + bool initialized; + /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when From 36560efeab3232aa18d1190f7202eb42ff29e0f4 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 21 Mar 2022 14:06:24 -0400 Subject: [PATCH 167/708] platform/x86: think-lmi: certificate support clean ups Complete some clean-ups as reqested from the last review as follow-ups - Remove certificate from structure as no need to store it any more - Clean up return code handling - Moved freeing of signature to before admin object released (issue seen in testing when unloading module) - Minor code flow improvements Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220321180624.4761-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 44 +++++++++++--------------------- drivers/platform/x86/think-lmi.h | 1 - 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index bce17ca97947..a01a92769c1a 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -740,16 +740,8 @@ static ssize_t certificate_store(struct kobject *kobj, if (!tlmi_priv.certificate_support) return -EOPNOTSUPP; - new_cert = kstrdup(buf, GFP_KERNEL); - if (!new_cert) - return -ENOMEM; - /* Strip out CR if one is present */ - strip_cr(new_cert); - /* If empty then clear installed certificate */ - if (new_cert[0] == '\0') { /* Clear installed certificate */ - kfree(new_cert); - + if ((buf[0] == '\0') || (buf[0] == '\n')) { /* Clear installed certificate */ /* Check that signature is set */ if (!setting->signature || !setting->signature[0]) return -EACCES; @@ -763,14 +755,16 @@ static ssize_t certificate_store(struct kobject *kobj, ret = tlmi_simple_call(LENOVO_CLEAR_BIOS_CERT_GUID, auth_str); kfree(auth_str); - if (ret) - return ret; - kfree(setting->certificate); - setting->certificate = NULL; - return count; + return ret ?: count; } + new_cert = kstrdup(buf, GFP_KERNEL); + if (!new_cert) + return -ENOMEM; + /* Strip out CR if one is present */ + strip_cr(new_cert); + if (setting->cert_installed) { /* Certificate is installed so this is an update */ if (!setting->signature || !setting->signature[0]) { @@ -792,21 +786,14 @@ static ssize_t certificate_store(struct kobject *kobj, auth_str = kasprintf(GFP_KERNEL, "%s,%s", new_cert, setting->password); } - if (!auth_str) { - kfree(new_cert); + kfree(new_cert); + if (!auth_str) return -ENOMEM; - } ret = tlmi_simple_call(guid, auth_str); kfree(auth_str); - if (ret) { - kfree(new_cert); - return ret; - } - kfree(setting->certificate); - setting->certificate = new_cert; - return count; + return ret ?: count; } static struct kobj_attribute auth_certificate = __ATTR_WO(certificate); @@ -1194,6 +1181,10 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.attribute_kset); + /* Free up any saved signatures */ + kfree(tlmi_priv.pwd_admin->signature); + kfree(tlmi_priv.pwd_admin->save_signature); + /* Authentication structures */ sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_admin->kobj); @@ -1210,11 +1201,6 @@ static void tlmi_release_attr(void) } kset_unregister(tlmi_priv.authentication_kset); - - /* Free up any saved certificates/signatures */ - kfree(tlmi_priv.pwd_admin->certificate); - kfree(tlmi_priv.pwd_admin->signature); - kfree(tlmi_priv.pwd_admin->save_signature); } static int tlmi_sysfs_init(void) diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 4f69df6eed07..4daba6151cd6 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -63,7 +63,6 @@ struct tlmi_pwd_setting { int index; /*Used for HDD and NVME auth */ enum level_option level; bool cert_installed; - char *certificate; char *signature; char *save_signature; }; From 442b8b250c41050d26353eb158514f3d91df3455 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Mar 2022 14:53:19 -0300 Subject: [PATCH 168/708] MAINTAINERS: Add Leon Romanovsky to RDMA maintainers Welcome Leon to the maintainer list so we continue to have two people on a medium sized subsystem. Link: https://lore.kernel.org/r/0-v1-64175bea3d24+13436-leon_maint_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..7f308fb9faa9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9598,6 +9598,7 @@ F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM M: Jason Gunthorpe +M: Leon Romanovsky L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/linux-rdma/rdma-core From abcc160e4c2bc320f0d94e1b77f272a12fe90a0e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 29 Mar 2022 14:42:21 -0400 Subject: [PATCH 169/708] MAINTAINERS: Update qib and hfi1 related drivers Remove Mike's contact from maintainers file. Link: https://lore.kernel.org/r/20220329184221.182061.69846.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7f308fb9faa9..9b1c91d9fb8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8675,7 +8675,6 @@ F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h HFI1 DRIVER -M: Mike Marciniszyn M: Dennis Dalessandro L: linux-rdma@vger.kernel.org S: Supported @@ -14657,7 +14656,6 @@ F: drivers/rtc/rtc-optee.c OPA-VNIC DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/ulp/opa_vnic @@ -16099,7 +16097,6 @@ F: include/uapi/linux/qemu_fw_cfg.h QIB DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qib/ @@ -16617,7 +16614,6 @@ F: drivers/net/ethernet/rdc/r6040.c RDMAVT - RDMA verbs software M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rdmavt From 84c2362fb65d69c721fec0974556378cbb36a62b Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 4 Apr 2022 11:58:03 +0300 Subject: [PATCH 170/708] RDMA/mlx5: Don't remove cache MRs when a delay is needed Don't remove MRs from the cache if need to delay the removal. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/c3087a90ff362c8796c7eaa2715128743ce36722.1649062436.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 956f8e875daa..45b0680377ec 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -574,8 +574,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) spin_lock_irq(&ent->lock); if (ent->disabled) goto out; - if (need_delay) + if (need_delay) { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + goto out; + } remove_cache_mr_locked(ent); queue_adjust_cache_locked(ent); } From 1d735eeee63a0beb65180ca0224f239cc0c9f804 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 4 Apr 2022 11:58:04 +0300 Subject: [PATCH 171/708] RDMA/mlx5: Add a missing update of cache->last_add Update cache->last_add when returning an MR to the cache so that the cache work won't remove it. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/c99f076fce4b44829d434936bbcd3b5fc4c95020.1649062436.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45b0680377ec..32ef67e9a6a7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -627,6 +627,7 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; From 107dd7beba403a363adfeb3ffe3734fe38a05cce Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 Apr 2022 11:58:05 +0300 Subject: [PATCH 172/708] IB/cm: Cancel mad on the DREQ event when the state is MRA_REP_RCVD On the passive side when the disconnectReq event comes, if the current state is MRA_REP_RCVD, it needs to cancel the MAD before entering the DREQ_RCVD and TIMEWAIT states, otherwise the destroy_id may block until this mad will reach timeout. Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Link: https://lore.kernel.org/r/75261c00c1d82128b1d981af9ff46e994186e621.1649062436.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 35f0d5e7533d..1c107d6d03b9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: + case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: @@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); break; - case IB_CM_MRA_REP_RCVD: - break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); From 8a1e6bb3f78f06432e095758476358d8cb63c03d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 30 Mar 2022 09:40:15 +0200 Subject: [PATCH 173/708] dt-bindings: update Krzysztof Kozlowski's email Krzysztof Kozlowski's @canonical.com email stopped working, so switch to generic @kernel.org account for all Devicetree bindings. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220330074016.12896-2-krzysztof.kozlowski@linaro.org --- .../devicetree/bindings/clock/samsung,exynos-audss-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos-ext-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos5260-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos5410-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos5433-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos7-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos7885-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,exynos850-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml | 2 +- .../devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml | 2 +- .../devicetree/bindings/clock/samsung,s5pv210-clock.yaml | 2 +- .../devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml | 2 +- .../devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml | 2 +- .../bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml | 2 +- .../bindings/display/samsung/samsung,exynos-hdmi.yaml | 2 +- .../bindings/display/samsung/samsung,exynos-mixer.yaml | 2 +- .../bindings/display/samsung/samsung,exynos5433-decon.yaml | 2 +- .../bindings/display/samsung/samsung,exynos5433-mic.yaml | 2 +- .../bindings/display/samsung/samsung,exynos7-decon.yaml | 2 +- .../devicetree/bindings/display/samsung/samsung,fimd.yaml | 2 +- Documentation/devicetree/bindings/extcon/maxim,max77843.yaml | 2 +- Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml | 2 +- Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml | 2 +- Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml | 2 +- Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml | 2 +- Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml | 2 +- Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml | 2 +- Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml | 2 +- .../interrupt-controller/samsung,exynos4210-combiner.yaml | 2 +- Documentation/devicetree/bindings/leds/maxim,max77693.yaml | 2 +- .../devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml | 2 +- .../bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml | 2 +- .../bindings/memory-controllers/ddr/jedec,lpddr2.yaml | 2 +- .../bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml | 2 +- .../bindings/memory-controllers/ddr/jedec,lpddr3.yaml | 2 +- .../memory-controllers/marvell,mvebu-sdram-controller.yaml | 2 +- .../bindings/memory-controllers/qca,ath79-ddr-controller.yaml | 2 +- .../bindings/memory-controllers/renesas,h8300-bsc.yaml | 2 +- .../bindings/memory-controllers/samsung,exynos5422-dmc.yaml | 2 +- .../bindings/memory-controllers/synopsys,ddrc-ecc.yaml | 2 +- .../devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml | 2 +- Documentation/devicetree/bindings/mfd/maxim,max14577.yaml | 2 +- Documentation/devicetree/bindings/mfd/maxim,max77686.yaml | 2 +- Documentation/devicetree/bindings/mfd/maxim,max77693.yaml | 2 +- Documentation/devicetree/bindings/mfd/maxim,max77802.yaml | 2 +- Documentation/devicetree/bindings/mfd/maxim,max77843.yaml | 2 +- .../devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml | 2 +- Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml | 2 +- Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml | 2 +- Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml | 2 +- Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml | 2 +- Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml | 2 +- .../devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml | 2 +- .../devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml | 2 +- .../devicetree/bindings/phy/samsung,mipi-video-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml | 2 +- .../devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml | 2 +- .../devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml | 2 +- .../bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml | 2 +- .../devicetree/bindings/power/supply/maxim,max14577.yaml | 2 +- .../devicetree/bindings/power/supply/maxim,max77693.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max14577.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max77686.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max77693.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max77802.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max77843.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max8952.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max8973.yaml | 2 +- Documentation/devicetree/bindings/regulator/maxim,max8997.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mpa01.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mps11.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mps13.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mps14.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mps15.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s2mpu02.yaml | 2 +- .../devicetree/bindings/regulator/samsung,s5m8767.yaml | 2 +- .../devicetree/bindings/rng/samsung,exynos5250-trng.yaml | 2 +- Documentation/devicetree/bindings/rng/timeriomem_rng.yaml | 2 +- Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml | 2 +- Documentation/devicetree/bindings/sound/samsung,arndale.yaml | 2 +- Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml | 2 +- Documentation/devicetree/bindings/sound/samsung,snow.yaml | 2 +- Documentation/devicetree/bindings/sound/samsung,tm2.yaml | 2 +- .../devicetree/bindings/spi/samsung,spi-peripheral-props.yaml | 2 +- Documentation/devicetree/bindings/spi/samsung,spi.yaml | 2 +- .../devicetree/bindings/thermal/samsung,exynos-thermal.yaml | 2 +- Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml | 2 +- Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml | 2 +- 99 files changed, 99 insertions(+), 99 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml index f14f1d39da36..d819dfaafff9 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos SoC Audio SubSystem clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml index 4e8062860986..0589a63e273a 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml index 64d027dbe3b2..c98eff64f2b5 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml @@ -8,7 +8,7 @@ title: Samsung SoC external/osc/XXTI/XusbXTI clock maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml index 1ed64add4355..b644bbd0df38 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos4412 SoC ISP clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml index a3fac5c6809d..b05f83533e3d 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos5260 SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml index 032862e9f55b..b737c9d35a1c 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos5410 SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml index edd1b4ac4334..3f9326e09f79 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos5433 SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml index 599baf0b7231..c137c6744ef9 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos7 SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml index 7e5a9cac2fd2..5073e569a47f 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml @@ -9,7 +9,7 @@ title: Samsung Exynos7885 SoC clock controller maintainers: - Dávid Virág - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml index 80ba60838f2b..aa11815ad3a3 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml @@ -9,7 +9,7 @@ title: Samsung Exynos850 SoC clock controller maintainers: - Sam Protsenko - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml b/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml index 1410c51e0e7d..9248bfc16d48 100644 --- a/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2M and S5M family clock generator block maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml index ae8f8fc93233..2659854ea1c0 100644 --- a/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml @@ -8,7 +8,7 @@ title: Samsung S5Pv210 SoC Audio SubSystem clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml index dcb29a2d1159..67a33665cf00 100644 --- a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml @@ -8,7 +8,7 @@ title: Samsung S5P6442/S5PC110/S5PV210 SoC clock controller maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml index d318fccf78f1..2bdd05af6079 100644 --- a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml +++ b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos NoC (Network on Chip) Probe maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | The Samsung Exynos542x SoC has a NoC (Network on Chip) Probe for NoC bus. diff --git a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml index c9a8cb5fd555..e300df4b47f3 100644 --- a/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml +++ b/Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml @@ -8,7 +8,7 @@ title: Samsung Exynos SoC PPMU (Platform Performance Monitoring Unit) maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | The Samsung Exynos SoC has PPMU (Platform Performance Monitoring Unit) for diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml index f998a3a5b71f..919734c05c0b 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml index cb8e735ce3bd..63379fae3636 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml index ba40284ac66f..00e325a19cb1 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: Samsung Exynos SoC Mixer is responsible for mixing and blending multiple data diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml index 6f796835ea03..7c37470bd329 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | DECON (Display and Enhancement Controller) is the Display Controller for the diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml index 01fccb138ebd..c5c6239c28d0 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | MIC (Mobile Image Compressor) resides between DECON and MIPI DSI. MIPI DSI is diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml index afa137d47922..320eedc61a5b 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | DECON (Display and Enhancement Controller) is the Display Controller for the diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml index 9cf5f120d516..c62ea9d22843 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml index f9ffe3d6f957..03d6b8dbbdd3 100644 --- a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml +++ b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77843 MicroUSB and Companion Power Management IC Extcon maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77843 MicroUSB diff --git a/Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml b/Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml index 4b5851c326f7..b1a4c235376e 100644 --- a/Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml +++ b/Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LTC4151 High Voltage I2C Current and Voltage Monitor maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml b/Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml index c42051f8a191..028d6e570131 100644 --- a/Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml +++ b/Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Microchip MCP3021 A/D converter maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml b/Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml index 4669217d01e1..80df7182ea28 100644 --- a/Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml +++ b/Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sensirion SHT15 humidity and temperature sensor maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml index d3eff4fac107..c5a889e3e27b 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: TMP102 temperature sensor maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml index eda55bbc172d..dcbc6fbc3b48 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: TMP108 temperature sensor maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml index 19874e8b73b9..3e52a0db6c41 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung's High Speed I2C controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | The Samsung's High Speed I2C controller is used to interface with I2C devices diff --git a/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml b/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml index 84051b0129c2..c26230518957 100644 --- a/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC I2C Controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml b/Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml index d631b7589d50..72456a07dac9 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC Interrupt Combiner Controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | Samsung's Exynos4 architecture includes a interrupt combiner controller which diff --git a/Documentation/devicetree/bindings/leds/maxim,max77693.yaml b/Documentation/devicetree/bindings/leds/maxim,max77693.yaml index 86a0005cf156..e27f57bb52ae 100644 --- a/Documentation/devicetree/bindings/leds/maxim,max77693.yaml +++ b/Documentation/devicetree/bindings/leds/maxim,max77693.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX77693 MicroUSB and Companion Power Management IC LEDs maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated diff --git a/Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml b/Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml index 769f13250047..08cbdcddfead 100644 --- a/Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: DDR PHY Front End (DPFE) for Broadcom STB maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Markus Mayer properties: diff --git a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml index f3e62ee07126..1daa66592477 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LPDDR2 SDRAM AC timing parameters for a given speed-bin maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml index dd2141cad866..9d78f140609b 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LPDDR2 SDRAM compliant to JEDEC JESD209-2 maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml index 97c3e988af5f..5c6512c1e1e3 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LPDDR3 SDRAM AC timing parameters for a given speed-bin maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml index c542f32c39fa..48908a19473c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LPDDR3 SDRAM compliant to JEDEC JESD209-3 maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml b/Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml index 14a6bc8f421f..9249624c4fa0 100644 --- a/Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml @@ -8,7 +8,7 @@ title: Marvell MVEBU SDRAM controller maintainers: - Jan Luebbe - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml b/Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml index 9566b3421f03..0c511ab906bf 100644 --- a/Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Atheros AR7xxx/AR9xxx DDR controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | The DDR controller of the AR7xxx and AR9xxx families provides an interface to diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.yaml b/Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.yaml index 2b18cef99511..514b2c5f8858 100644 --- a/Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: H8/300 bus controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Yoshinori Sato properties: diff --git a/Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml index f152243f6b18..098348b2b815 100644 --- a/Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml @@ -9,7 +9,7 @@ title: | Controller device maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Lukasz Luba description: | diff --git a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml index fb7ae38a9c86..06812512e9b2 100644 --- a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Synopsys IntelliDDR Multi Protocol memory controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Manish Narani - Michal Simek diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml index 9ed51185ff99..382ddab60fbd 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml @@ -8,7 +8,7 @@ title: Texas Instruments da8xx DDR2/mDDR memory controller maintainers: - Bartosz Golaszewski - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | Documentation: diff --git a/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml index 27870b8760a6..52edd1bf549f 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77686.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77686.yaml index 859655a789c3..d027aabe453b 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max77686.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max77686.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77686 Power Management IC maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77686 Power Management diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml index 906101197e11..1b06a77ec798 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max77693.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77693 MicroUSB and Companion Power Management IC maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77693 MicroUSB diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml index baa1346ac5d5..ad2013900b03 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77802 Power Management IC maintainers: - Javier Martinez Canillas - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77802 Power Management diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml index 61a0f9dcb983..f30f96bbff43 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX77843 MicroUSB and Companion Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77843 MicroUSB diff --git a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml index bae55c98961c..f7bb67d10eff 100644 --- a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml +++ b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC Low Power Audio Subsystem (LPASS) maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki properties: diff --git a/Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml b/Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml index 017befdf8adb..055dfc337c2f 100644 --- a/Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml +++ b/Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPA01 Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml b/Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml index 771b3f16da96..5ff6546c72b7 100644 --- a/Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml +++ b/Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPS11/13/14/15 and S2MPU02 Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml b/Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml index 5531718abdf0..10c7b408f33a 100644 --- a/Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml +++ b/Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S5M8767 Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml index 15a45db3899a..1bcaf6ba822c 100644 --- a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Marvell International Ltd. NCI NFC controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml index 7465aea2e1c0..e381a3c14836 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml @@ -8,7 +8,7 @@ title: NXP Semiconductors NCI NFC controller maintainers: - Charles Gorand - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml index d8ba5a18db98..0509e0166345 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: NXP Semiconductors PN532 NFC controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml index d520414de463..18b3a7d819df 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: NXP Semiconductors PN544 NFC Controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml b/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml index a6a1bc788d29..ef1155038a2f 100644 --- a/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics ST NCI NFC controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml b/Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml index 4356eacde8aa..8a7274357b46 100644 --- a/Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml +++ b/Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics SAS ST21NFCA NFC controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml b/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml index d3bca376039e..963d9531a856 100644 --- a/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml +++ b/Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics ST95HF NFC controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml index 40da2ac98978..404c8df99364 100644 --- a/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml +++ b/Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments TRF7970A RFID/NFC/15693 Transceiver maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Mark Greer properties: diff --git a/Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml index 838c6d480ce6..b03b2f00cc5b 100644 --- a/Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC DisplayPort PHY maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Marek Szyprowski - Sylwester Nawrocki diff --git a/Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml index c61574e10b2a..3e5f035de2e9 100644 --- a/Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml @@ -11,7 +11,7 @@ maintainers: - Joonyoung Shim - Seung-Woo Kim - Kyungmin Park - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml index 62b39bb46585..8751e559484f 100644 --- a/Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos5250 SoC SATA PHY maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Marek Szyprowski - Sylwester Nawrocki diff --git a/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml index 54aa056b224d..415440aaad89 100644 --- a/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S5P/Exynos SoC MIPI CSIS/DSIM DPHY maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Marek Szyprowski - Sylwester Nawrocki diff --git a/Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml index 056e270a4e88..d9f22a801cbf 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S5P/Exynos SoC USB 2.0 PHY maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Marek Szyprowski - Sylwester Nawrocki diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index f83f0f8135b9..5ba55f9f20cc 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC USB 3.0 DRD PHY USB 2.0 PHY maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Marek Szyprowski - Sylwester Nawrocki diff --git a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml index f73348c54748..8cf3c47ab86b 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml +++ b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC pin controller - gpio bank maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml index c71939ac8b63..9869d4dceddb 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml +++ b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC pin controller - pins configuration maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml index a822f70f5702..1de91a51234d 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml +++ b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC pin controller - wake-up interrupt controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml index 989e48c051cf..3a65c66ca71d 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC pin controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki - Tomasz Figa diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml index 3978b48299de..4d3a1d09036f 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC Charger maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml index a21dc1a8890f..f5fd53debbc8 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX77693 MicroUSB and Companion Power Management IC Charger maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated diff --git a/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml index 16f01886a601..285dc7122977 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77686.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77686.yaml index bb64b679f765..0e7cd4b3ace0 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max77686.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max77686.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77686 Power Management IC regulators maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77686 Power Management diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77693.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77693.yaml index 20d8559bdc2b..945a539749e8 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max77693.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max77693.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77693 MicroUSB and Companion Power Management IC regulators maintainers: - Chanwoo Choi - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml index f2b4dd15a0f3..236348c4710c 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml @@ -8,7 +8,7 @@ title: Maxim MAX77802 Power Management IC regulators maintainers: - Javier Martinez Canillas - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77802 Power Management diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml index a963025e96c1..9695e7242882 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX77843 MicroUSB and Companion Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for Maxim MAX77843 MicroUSB Integrated diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8952.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8952.yaml index e4e8c58f6046..3ff0d7d980e9 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max8952.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max8952.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX8952 voltage regulator maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski allOf: - $ref: regulator.yaml# diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml index 5898dcf10f06..b92eef68c19f 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX8973/MAX77621 voltage regulator maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski allOf: - $ref: regulator.yaml# diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8997.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8997.yaml index d5a44ca3df04..4321f061a7f6 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max8997.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max8997.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Maxim MAX8997 Power Management IC maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | The Maxim MAX8997 is a Power Management IC which includes voltage and current diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml index 0627dec513da..0f9eb317ba9a 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPA01 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml index e3b780715f44..f1c50dcd0b04 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPS11 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml index 579d77aefc3f..53b105a4ead1 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPS13 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml index fdea290b3e94..01f9d4e236e9 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPS14 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml index b3a883c94628..9576c2df45a6 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPS15 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml b/Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml index 0ded6953e3b6..39b652c3c3c4 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S2MPU02 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml index 3c1617b66861..172631ca3c25 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S5M8767 Power Management IC regulators maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | This is a part of device tree bindings for S2M and S5M family of Power diff --git a/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml b/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml index a50c34d5d199..765d9f9edd6e 100644 --- a/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml +++ b/Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC True Random Number Generator maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Łukasz Stelmach properties: diff --git a/Documentation/devicetree/bindings/rng/timeriomem_rng.yaml b/Documentation/devicetree/bindings/rng/timeriomem_rng.yaml index 84bf518a5549..4754174e9849 100644 --- a/Documentation/devicetree/bindings/rng/timeriomem_rng.yaml +++ b/Documentation/devicetree/bindings/rng/timeriomem_rng.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: TimerIO Random Number Generator maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml index a98ed66d092e..0cabb773c397 100644 --- a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml +++ b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml @@ -8,7 +8,7 @@ title: Samsung's Exynos USI (Universal Serial Interface) binding maintainers: - Sam Protsenko - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | USI IP-core provides selectable serial protocol (UART, SPI or High-Speed I2C). diff --git a/Documentation/devicetree/bindings/sound/samsung,arndale.yaml b/Documentation/devicetree/bindings/sound/samsung,arndale.yaml index cea2bf3544f0..9bc4585bb6e5 100644 --- a/Documentation/devicetree/bindings/sound/samsung,arndale.yaml +++ b/Documentation/devicetree/bindings/sound/samsung,arndale.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Insignal Arndale boards audio complex maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki properties: diff --git a/Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml b/Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml index cb51af90435e..ac151d3c1d77 100644 --- a/Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml +++ b/Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung SMDK5250 audio complex with WM8994 codec maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki properties: diff --git a/Documentation/devicetree/bindings/sound/samsung,snow.yaml b/Documentation/devicetree/bindings/sound/samsung,snow.yaml index 0c3b3302b842..51a83d3c7274 100644 --- a/Documentation/devicetree/bindings/sound/samsung,snow.yaml +++ b/Documentation/devicetree/bindings/sound/samsung,snow.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Google Snow audio complex with MAX9809x codec maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki properties: diff --git a/Documentation/devicetree/bindings/sound/samsung,tm2.yaml b/Documentation/devicetree/bindings/sound/samsung,tm2.yaml index 74712d6f3ef4..491e08019c04 100644 --- a/Documentation/devicetree/bindings/sound/samsung,tm2.yaml +++ b/Documentation/devicetree/bindings/sound/samsung,tm2.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos5433 TM2(E) audio complex with WM5110 codec maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski - Sylwester Nawrocki properties: diff --git a/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml index f0db3fb3d688..25b1b6c12d4d 100644 --- a/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Peripheral-specific properties for Samsung S3C/S5P/Exynos SoC SPI controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: See spi-peripheral-props.yaml for more info. diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml index bf9a76d931d2..a50f24f9359d 100644 --- a/Documentation/devicetree/bindings/spi/samsung,spi.yaml +++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung S3C/S5P/Exynos SoC SPI controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: All the SPI controller nodes should be represented in the aliases node using diff --git a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml index 17129f75d962..1344df708e2d 100644 --- a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC Thermal Management Unit (TMU) maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski description: | For multi-instance tmu each instance should have an alias correctly numbered diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml index 22b91a27d776..6b9a3bcb3926 100644 --- a/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC USB 3.0 DWC3 Controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml index fbf07d6e707a..340dff8d19c3 100644 --- a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml +++ b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung Exynos SoC USB 2.0 EHCI/OHCI Controller maintainers: - - Krzysztof Kozlowski + - Krzysztof Kozlowski properties: compatible: From 1a9f338f9cf96f8338d5592dee5fce222929e4f7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 30 Mar 2022 09:40:16 +0200 Subject: [PATCH 174/708] MAINTAINERS: update Krzysztof Kozlowski's email to Linaro Use Krzysztof Kozlowski's @linaro.org account in maintainer entries. Signed-off-by: Krzysztof Kozlowski Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220330074016.12896-3-krzysztof.kozlowski@linaro.org --- MAINTAINERS | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..edf0bf37de8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2636,7 +2636,7 @@ F: sound/soc/rockchip/ N: rockchip ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org @@ -11905,7 +11905,7 @@ F: drivers/iio/proximity/mb1232.c MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS R: Iskren Chernev -R: Krzysztof Kozlowski +R: Krzysztof Kozlowski R: Marek Szyprowski R: Matheus Castello L: linux-pm@vger.kernel.org @@ -11915,7 +11915,7 @@ F: drivers/power/supply/max17040_battery.c MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS R: Hans de Goede -R: Krzysztof Kozlowski +R: Krzysztof Kozlowski R: Marek Szyprowski R: Sebastian Krzyszkowiak R: Purism Kernel Team @@ -11967,7 +11967,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml F: drivers/power/supply/max77976_charger.c MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Bartlomiej Zolnierkiewicz L: linux-pm@vger.kernel.org S: Supported @@ -11978,7 +11978,7 @@ F: drivers/power/supply/max77693_charger.c MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org S: Supported @@ -12672,7 +12672,7 @@ F: mm/memblock.c F: tools/testing/memblock/ MEMORY CONTROLLER DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git @@ -13816,7 +13816,7 @@ F: include/uapi/linux/nexthop.h F: net/ipv4/nexthop.c NFC SUBSYSTEM -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-nfc@lists.01.org (subscribers-only) L: netdev@vger.kernel.org S: Maintained @@ -14133,7 +14133,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml F: drivers/regulator/pf8x00-regulator.c NXP PTN5150A CC LOGIC AND EXTCON DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml @@ -14687,7 +14687,7 @@ F: scripts/dtc/ OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS M: Rob Herring -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: devicetree@vger.kernel.org S: Maintained C: irc://irc.libera.chat/devicetree @@ -15599,7 +15599,7 @@ F: drivers/pinctrl/renesas/ PIN CONTROLLER - SAMSUNG M: Tomasz Figa -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -17278,7 +17278,7 @@ W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/s390/scsi/zfcp_* S3C ADC BATTERY DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-samsung-soc@vger.kernel.org S: Odd Fixes F: drivers/power/supply/s3c_adc_battery.c @@ -17323,7 +17323,7 @@ F: Documentation/admin-guide/LSM/SafeSetID.rst F: security/safesetid/ SAMSUNG AUDIO (ASoC) DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported @@ -17331,7 +17331,7 @@ F: Documentation/devicetree/bindings/sound/samsung* F: sound/soc/samsung/ SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@ -17366,7 +17366,7 @@ S: Maintained F: drivers/platform/x86/samsung-laptop.c SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Bartlomiej Zolnierkiewicz L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@ -17392,7 +17392,7 @@ F: drivers/media/platform/samsung/s3c-camif/ F: include/media/drv-intf/s3c_camif.h SAMSUNG S3FWRN5 NFC DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Krzysztof Opasiak L: linux-nfc@lists.01.org (subscribers-only) S: Maintained @@ -17414,7 +17414,7 @@ S: Supported F: drivers/media/i2c/s5k5baf.c SAMSUNG S5P Security SubSystem (SSS) DRIVER -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Vladimir Zapolskiy L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@ -17449,7 +17449,7 @@ F: include/linux/clk/samsung.h F: include/linux/platform_data/clk-s3c2410.h SAMSUNG SPI DRIVERS -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski M: Andi Shyti L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@ -17467,7 +17467,7 @@ F: drivers/net/ethernet/samsung/sxgbe/ SAMSUNG THERMAL DRIVER M: Bartlomiej Zolnierkiewicz -M: Krzysztof Kozlowski +M: Krzysztof Kozlowski L: linux-pm@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained From 0284d4d1be753f648f28b77bdfbe6a959212af5c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 22 Mar 2022 14:18:30 +0800 Subject: [PATCH 175/708] platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negative Eliminate the follow smatch warnings: drivers/platform/x86/samsung-laptop.c:1124 kbd_led_set() warn: unsigned 'value' is never less than zero. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Link: https://lore.kernel.org/r/20220322061830.105579-1-jiapeng.chong@linux.alibaba.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/samsung-laptop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index c1d9ed9b7b67..19f6b456234f 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev, if (value > samsung->kbd_led.max_brightness) value = samsung->kbd_led.max_brightness; - else if (value < 0) - value = 0; samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); From 3f2a3c79a4536fba6a8eee8a4f49218467216300 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Wed, 23 Mar 2022 15:50:25 +0800 Subject: [PATCH 176/708] platform/x86: barco-p50-gpio: Fix duplicate included linux/io.h Clean up the following includecheck warning: drivers/platform/x86/barco-p50-gpio.c: linux/io.h is included more than once. No functional change. Signed-off-by: Haowen Bai Acked-by: Peter Korsgaard Link: https://lore.kernel.org/r/1648021825-6182-1-git-send-email-baihaowen@meizu.com Signed-off-by: Hans de Goede --- drivers/platform/x86/barco-p50-gpio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c index f5c72e33f9ae..05534287bc26 100644 --- a/drivers/platform/x86/barco-p50-gpio.c +++ b/drivers/platform/x86/barco-p50-gpio.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include From c5547574797b254ba9c98c7da417bc5de71cd198 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:34 +0100 Subject: [PATCH 177/708] Documentation/ABI: sysfs-driver-intel_sdsi: Fix sphinx warnings Fix the following warnings from "make htmldocs": Documentation/ABI/testing/sysfs-driver-intel_sdsi:2: WARNING: Unexpected indentation. WARNING: Block quote ends without a blank line; unexpected unindent. WARNING: Definition list ends without a blank line; unexpected unindent. By turning the error-code table into a proper ReST table. While at it also fix the error-code table mixing tab and spaces for indentation (switch to all tabs). Reported-by: Stephen Rothwell Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-2-hdegoede@redhat.com --- .../ABI/testing/sysfs-driver-intel_sdsi | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel_sdsi b/Documentation/ABI/testing/sysfs-driver-intel_sdsi index ab122125ff9a..96b92c105ec4 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel_sdsi +++ b/Documentation/ABI/testing/sysfs-driver-intel_sdsi @@ -13,17 +13,19 @@ Description: Should the operation fail, one of the following error codes may be returned: + ========== ===== Error Code Cause - ---------- ----- - EIO General mailbox failure. Log may indicate cause. - EBUSY Mailbox is owned by another agent. - EPERM SDSI capability is not enabled in hardware. - EPROTO Failure in mailbox protocol detected by driver. + ========== ===== + EIO General mailbox failure. Log may indicate cause. + EBUSY Mailbox is owned by another agent. + EPERM SDSI capability is not enabled in hardware. + EPROTO Failure in mailbox protocol detected by driver. See log for details. - EOVERFLOW For provision commands, the size of the data + EOVERFLOW For provision commands, the size of the data exceeds what may be written. - ESPIPE Seeking is not allowed. - ETIMEDOUT Failure to complete mailbox transaction in time. + ESPIPE Seeking is not allowed. + ETIMEDOUT Failure to complete mailbox transaction in time. + ========== ===== What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid Date: Feb 2022 From 45440a1d79eed68bcb8db236a6967a1d5c37a8ce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:35 +0100 Subject: [PATCH 178/708] Documentation/ABI: sysfs-class-firmware-attributes: Fix Sphinx errors Fix the following warnings from "make htmldocs": Documentation/ABI/testing/sysfs-class-firmware-attributes:130: ERROR: Unexpected indentation. ERROR: Unexpected indentation. ERROR: Unexpected indentation. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-3-hdegoede@redhat.com --- .../testing/sysfs-class-firmware-attributes | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes index 05820365f1ec..5356ff2ed6c8 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware-attributes +++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes @@ -246,9 +246,7 @@ Description: that is being referenced (e.g hdd0, hdd1 etc) This attribute defaults to device 0. - certificate: - signature: - save_signature: + certificate, signature, save_signature: These attributes are used for certificate based authentication. This is used in conjunction with a signing server as an alternative to password based authentication. @@ -257,22 +255,27 @@ Description: The attributes can be displayed to check the stored value. Some usage examples: - Installing a certificate to enable feature: - echo authentication/Admin/current_password - echo > authentication/Admin/certificate - Updating the installed certificate: - echo > authentication/Admin/signature - echo > authentication/Admin/certificate + Installing a certificate to enable feature:: - Removing the installed certificate: - echo > authentication/Admin/signature - echo '' > authentication/Admin/certificate + echo "supervisor password" > authentication/Admin/current_password + echo "signed certificate" > authentication/Admin/certificate - Changing a BIOS setting: - echo > authentication/Admin/signature - echo > authentication/Admin/save_signature - echo Enable > attribute/PasswordBeep/current_value + Updating the installed certificate:: + + echo "signature" > authentication/Admin/signature + echo "signed certificate" > authentication/Admin/certificate + + Removing the installed certificate:: + + echo "signature" > authentication/Admin/signature + echo "" > authentication/Admin/certificate + + Changing a BIOS setting:: + + echo "signature" > authentication/Admin/signature + echo "save signature" > authentication/Admin/save_signature + echo Enable > attribute/PasswordBeep/current_value You cannot enable certificate authentication if a supervisor password has not been set. @@ -288,9 +291,10 @@ Description: certificate_to_password: Write only attribute used to switch from certificate based authentication back to password based. - Usage: - echo > authentication/Admin/signature - echo > authentication/Admin/certificate_to_password + Usage:: + + echo "signature" > authentication/Admin/signature + echo "password" > authentication/Admin/certificate_to_password What: /sys/class/firmware-attributes/*/attributes/pending_reboot From 9aa6471419dc904c4f182295dbe00edfe4c92a29 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Mar 2022 17:47:36 +0100 Subject: [PATCH 179/708] Documentation/ABI: sysfs-class-firmware-attributes: Misc. cleanups Cleanup / fix some minor issues. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220324164737.21765-4-hdegoede@redhat.com --- Documentation/ABI/testing/sysfs-class-firmware-attributes | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes index 5356ff2ed6c8..4cdba3477176 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware-attributes +++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes @@ -116,7 +116,7 @@ Description: [ForceIf:=] [ForceIfNot:=] - For example: + For example:: LegacyOrom/dell_value_modifier has value: Disabled[ForceIf:SecureBoot=Enabled] @@ -212,7 +212,7 @@ Description: the next boot. Lenovo specific class extensions - ------------------------------ + -------------------------------- On Lenovo systems the following additional settings are available: @@ -349,7 +349,7 @@ Description: # echo "factory" > /sys/class/firmware-attributes/*/device/attributes/reset_bios # cat /sys/class/firmware-attributes/*/device/attributes/reset_bios - # builtinsafe lastknowngood [factory] custom + builtinsafe lastknowngood [factory] custom Note that any changes to this attribute requires a reboot for changes to take effect. From 487532ec20c1a0b9fc85c1265fa81f04a151f007 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Sat, 26 Mar 2022 10:02:49 +0800 Subject: [PATCH 180/708] platform/x86: acerhdf: Cleanup str_starts_with() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since there is already a generic function strstarts() that check if a string starts with a given prefix, cleanup str_starts_with(). Signed-off-by: Wei Li Acked-by: Peter Kästle Link: https://lore.kernel.org/r/20220326020249.3266561-1-liwei391@huawei.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/acerhdf.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 6b8b3ab8db48..3463629f8764 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -584,21 +584,6 @@ static struct platform_driver acerhdf_driver = { .remove = acerhdf_remove, }; -/* checks if str begins with start */ -static int str_starts_with(const char *str, const char *start) -{ - unsigned long str_len = 0, start_len = 0; - - str_len = strlen(str); - start_len = strlen(start); - - if (str_len >= start_len && - !strncmp(str, start, start_len)) - return 1; - - return 0; -} - /* check hardware */ static int __init acerhdf_check_hardware(void) { @@ -651,9 +636,9 @@ static int __init acerhdf_check_hardware(void) * check if actual hardware BIOS vendor, product and version * IDs start with the strings of BIOS table entry */ - if (str_starts_with(vendor, bt->vendor) && - str_starts_with(product, bt->product) && - str_starts_with(version, bt->version)) { + if (strstarts(vendor, bt->vendor) && + strstarts(product, bt->product) && + strstarts(version, bt->version)) { found = 1; break; } From 753ee989f7cf0c0a76a7f56956827a8863a60f97 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 2 Apr 2022 18:11:22 -0500 Subject: [PATCH 181/708] platform/x86: amd-pmc: Fix compilation without CONFIG_SUSPEND Since commit b1f66033cd4e ("platform/x86: amd-pmc: Move to later in the suspend process") amd-pmc doesn't use traditional suspend resume callback anymore but relies on functions only created declared when CONFIG_SUSPEND is set. Check for CONFIG_SUSPEND and only use those functions in those circumstances. Fixes: commit b1f66033cd4e ("platform/x86: amd-pmc: Move to later in the suspend process") Reported-by: Randy Dunlap Signed-off-by: Mario Limonciello Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20220402231122.3877-1-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index e9d0dbbb2887..fa4123dbdf7f 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -160,8 +160,10 @@ MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism"); static struct amd_pmc_dev pmc; static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret); -static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf); +#ifdef CONFIG_SUSPEND +static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); +#endif static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -325,6 +327,7 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table return 0; } +#ifdef CONFIG_SUSPEND static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) { struct smu_metrics table; @@ -338,6 +341,7 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n", table.timein_s0i3_lastcapture); } +#endif #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) @@ -569,6 +573,7 @@ out_unlock: return rc; } +#ifdef CONFIG_SUSPEND static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) { switch (dev->cpu_id) { @@ -694,6 +699,7 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = { .prepare = amd_pmc_s2idle_prepare, .restore = amd_pmc_s2idle_restore, }; +#endif static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, @@ -733,6 +739,7 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) return 0; } +#ifdef CONFIG_SUSPEND static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) { int err; @@ -753,6 +760,7 @@ static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) return 0; } +#endif static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf) { @@ -859,9 +867,11 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); +#ifdef CONFIG_SUSPEND err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops); if (err) dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n"); +#endif amd_pmc_dbgfs_register(dev); return 0; @@ -875,7 +885,9 @@ static int amd_pmc_remove(struct platform_device *pdev) { struct amd_pmc_dev *dev = platform_get_drvdata(pdev); +#ifdef CONFIG_SUSPEND acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops); +#endif amd_pmc_dbgfs_unregister(dev); pci_dev_put(dev->rdev); mutex_destroy(&dev->lock); From 83a1cde5c74bfb44b49cb2a940d044bb2380f4ea Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Dec 2021 15:21:41 -0700 Subject: [PATCH 182/708] ARM: davinci: da850-evm: Avoid NULL pointer dereference With newer versions of GCC, there is a panic in da850_evm_config_emac() when booting multi_v5_defconfig in QEMU under the palmetto-bmc machine: Unable to handle kernel NULL pointer dereference at virtual address 00000020 pgd = (ptrval) [00000020] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.0 #1 Hardware name: Generic DT based system PC is at da850_evm_config_emac+0x1c/0x120 LR is at do_one_initcall+0x50/0x1e0 The emac_pdata pointer in soc_info is NULL because davinci_soc_info only gets populated on davinci machines but da850_evm_config_emac() is called on all machines via device_initcall(). Move the rmii_en assignment below the machine check so that it is only dereferenced when running on a supported SoC. Fixes: bae105879f2f ("davinci: DA850/OMAP-L138 EVM: implement autodetect of RMII PHY") Signed-off-by: Nathan Chancellor Reviewed-by: Arnd Bergmann Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/YcS4xVWs6bQlQSPC@archlinux-ax161/ Signed-off-by: Arnd Bergmann --- arch/arm/mach-davinci/board-da850-evm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 428012687a80..7f7f6bae21c2 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void) int ret; u32 val; struct davinci_soc_info *soc_info = &davinci_soc_info; - u8 rmii_en = soc_info->emac_pdata->rmii_en; + u8 rmii_en; if (!machine_is_davinci_da850_evm()) return 0; + rmii_en = soc_info->emac_pdata->rmii_en; + cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG); val = __raw_readl(cfg_chip3_base); From 20314bacd2f9b1b8fc10895417e6db0dc85f8248 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 Apr 2022 06:43:38 -0700 Subject: [PATCH 183/708] staging: r8188eu: Fix PPPoE tag insertion on little endian systems In __nat25_add_pppoe_tag(), the tag length is read from the tag data structure. The value is kept in network format, but read as raw value. With -Warray-bounds, this results in the following gcc error/warning when building the driver on alpha. In function '__nat25_add_pppoe_tag', inlined from 'nat25_db_handle' at drivers/staging/r8188eu/core/rtw_br_ext.c:479:11: arch/alpha/include/asm/string.h:22:16: error: '__builtin_memcpy' forming offset [40, 2051] is out of the bounds [0, 40] of object 'tag_buf' with type 'unsigned char[40]' Add the missing be16_to_cpu() to fix the compile error. It should be noted, however, that this fix means that the code did probably not work on any little endian systems and/or that the driver has other endiannes related issues. A build with C=1 suggests that this is indeed the case. This patch does not attempt to fix any of those other issues. Fixes: 15865124feed ("staging: r8188eu: introduce new core dir for RTL8188eu driver") Cc: Phillip Potter Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20220404134338.3276991-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/core/rtw_br_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c index d68611ef22f8..f056204c0fdb 100644 --- a/drivers/staging/r8188eu/core/rtw_br_ext.c +++ b/drivers/staging/r8188eu/core/rtw_br_ext.c @@ -70,7 +70,7 @@ static int __nat25_add_pppoe_tag(struct sk_buff *skb, struct pppoe_tag *tag) struct pppoe_hdr *ph = (struct pppoe_hdr *)(skb->data + ETH_HLEN); int data_len; - data_len = tag->tag_len + TAG_HDR_LEN; + data_len = be16_to_cpu(tag->tag_len) + TAG_HDR_LEN; if (skb_tailroom(skb) < data_len) return -1; From 94865e2dcb46c1c852c881cfa769cec4947d8f28 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 4 Apr 2022 06:48:59 -0700 Subject: [PATCH 184/708] habanalabs: Fix test build failures allmodconfig builds on 32-bit architectures fail with the following error. drivers/misc/habanalabs/common/memory.c: In function 'alloc_device_memory': drivers/misc/habanalabs/common/memory.c:153:49: error: cast from pointer to integer of different size Fix the typecast. While at it, drop other unnecessary typecasts associated with the same commit. Fixes: e8458e20e0a3c ("habanalabs: make sure device mem alloc is page aligned") Cc: Ohad Sharabi Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20220404134859.3278599-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/misc/habanalabs/common/memory.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index e008d82e4ba3..a13506dd8119 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -111,10 +111,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, if (contiguous) { if (is_power_of_2(page_size)) - paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, - total_size, NULL, page_size); + paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, + total_size, NULL, page_size); else - paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size); + paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, "failed to allocate %llu contiguous pages with total size of %llu\n", @@ -150,12 +150,12 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, for (i = 0 ; i < num_pgs ; i++) { if (is_power_of_2(page_size)) phys_pg_pack->pages[i] = - (u64) gen_pool_dma_alloc_align(vm->dram_pg_pool, - page_size, NULL, - page_size); + (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool, + page_size, NULL, + page_size); else - phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool, - page_size); + phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, + page_size); if (!phys_pg_pack->pages[i]) { dev_err(hdev->dev, "Failed to allocate device memory (out of memory)\n"); From d10f4b22e912d9771493f71d05337362538eec07 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Apr 2022 17:30:44 +0200 Subject: [PATCH 185/708] ARM: iop32x: include iop3xx.h header where needed Building with 'make W=1' shows a warning about a missing prototype: arch/arm/mach-iop32x/cp6.c:10:6: warning: no previous prototype for 'iop_enable_cp6' [-Wmissing-prototypes] Include the header that contains the declaration. Fixes: 6f5d248d05db ("ARM: iop32x: use GENERIC_IRQ_MULTI_HANDLER") Reported-by: kernel test robot Signed-off-by: Arnd Bergmann --- arch/arm/mach-iop32x/cp6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-iop32x/cp6.c b/arch/arm/mach-iop32x/cp6.c index 2882674a1c39..7135a0ac9949 100644 --- a/arch/arm/mach-iop32x/cp6.c +++ b/arch/arm/mach-iop32x/cp6.c @@ -7,6 +7,8 @@ #include #include +#include "iop3xx.h" + void iop_enable_cp6(void) { u32 temp; From b452dbf24d7d9a990d70118462925f6ee287d135 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Mar 2022 19:06:32 +0100 Subject: [PATCH 186/708] memory: renesas-rpc-if: fix platform-device leak in error path Make sure to free the flash platform device in the event that registration fails during probe. Fixes: ca7d8b980b67 ("memory: add Renesas RPC-IF driver") Cc: stable@vger.kernel.org # 5.8 Cc: Sergei Shtylyov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220303180632.3194-1-johan@kernel.org Signed-off-by: Krzysztof Kozlowski --- drivers/memory/renesas-rpc-if.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index e4cc64f56019..2e545f473cc6 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -651,6 +651,7 @@ static int rpcif_probe(struct platform_device *pdev) struct platform_device *vdev; struct device_node *flash; const char *name; + int ret; flash = of_get_next_child(pdev->dev.of_node, NULL); if (!flash) { @@ -674,7 +675,14 @@ static int rpcif_probe(struct platform_device *pdev) return -ENOMEM; vdev->dev.parent = &pdev->dev; platform_set_drvdata(pdev, vdev); - return platform_device_add(vdev); + + ret = platform_device_add(vdev); + if (ret) { + platform_device_put(vdev); + return ret; + } + + return 0; } static int rpcif_remove(struct platform_device *pdev) From 6f296a9665ba5ac68937bf11f96214eb9de81baa Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 9 Mar 2022 11:01:43 +0000 Subject: [PATCH 187/708] memory: atmel-ebi: Fix missing of_node_put in atmel_ebi_probe The device_node pointer is returned by of_parse_phandle() with refcount incremented. We should use of_node_put() on it when done. Fixes: 87108dc78eb8 ("memory: atmel-ebi: Enable the SMC clock if specified") Signed-off-by: Miaoqian Lin Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220309110144.22412-1-linmq006@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/atmel-ebi.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index c267283b01fd..e749dcb3ddea 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev) smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0); ebi->smc.regmap = syscon_node_to_regmap(smc_np); - if (IS_ERR(ebi->smc.regmap)) - return PTR_ERR(ebi->smc.regmap); + if (IS_ERR(ebi->smc.regmap)) { + ret = PTR_ERR(ebi->smc.regmap); + goto put_node; + } ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np); - if (IS_ERR(ebi->smc.layout)) - return PTR_ERR(ebi->smc.layout); + if (IS_ERR(ebi->smc.layout)) { + ret = PTR_ERR(ebi->smc.layout); + goto put_node; + } ebi->smc.clk = of_clk_get(smc_np, 0); if (IS_ERR(ebi->smc.clk)) { - if (PTR_ERR(ebi->smc.clk) != -ENOENT) - return PTR_ERR(ebi->smc.clk); + if (PTR_ERR(ebi->smc.clk) != -ENOENT) { + ret = PTR_ERR(ebi->smc.clk); + goto put_node; + } ebi->smc.clk = NULL; } + of_node_put(smc_np); ret = clk_prepare_enable(ebi->smc.clk); if (ret) return ret; @@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev) } return of_platform_populate(np, NULL, NULL, dev); + +put_node: + of_node_put(smc_np); + return ret; } static __maybe_unused int atmel_ebi_resume(struct device *dev) From 4f9f45d0eb0e7d449bc9294459df79b9c66edfac Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Mon, 21 Mar 2022 15:51:30 +0800 Subject: [PATCH 188/708] dt-bindings: memory: snps,ddrc-3.80a compatible also need interrupts For the snps,ddrc-3.80a compatible, the interrupts property is also required, also order the compatibles by name (s goes before x). Signed-off-by: Sherry Sun Fixes: a9e6b3819b36 ("dt-bindings: memory: Add entry for version 3.80a") Link: https://lore.kernel.org/r/20220321075131.17811-2-sherry.sun@nxp.com Signed-off-by: Krzysztof Kozlowski --- .../bindings/memory-controllers/synopsys,ddrc-ecc.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml index fb7ae38a9c86..e3bc6ebce090 100644 --- a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml @@ -24,9 +24,9 @@ description: | properties: compatible: enum: + - snps,ddrc-3.80a - xlnx,zynq-ddrc-a05 - xlnx,zynqmp-ddrc-2.40a - - snps,ddrc-3.80a interrupts: maxItems: 1 @@ -43,7 +43,9 @@ allOf: properties: compatible: contains: - const: xlnx,zynqmp-ddrc-2.40a + enum: + - snps,ddrc-3.80a + - xlnx,zynqmp-ddrc-2.40a then: required: - interrupts From 687127c81ad32c8900a3fedbc7ed8f686ca95855 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Mar 2022 15:01:50 -0300 Subject: [PATCH 189/708] cifs: fix potential race with cifsd thread To avoid racing with demultiplex thread while it is handling data on socket, use cifs_signal_cifsd_for_reconnect() helper for marking current server to reconnect and let the demultiplex thread handle the rest. Fixes: dca65818c80c ("cifs: use a different reconnect helper for non-cifsd threads") Reviewed-by: Enzo Matsumiya Reviewed-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- fs/cifs/netmisc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ee3b7c15e884..3ca06bd88b6e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4465,7 +4465,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco */ if (rc && server->current_fullpath != server->origin_fullpath) { server->current_fullpath = server->origin_fullpath; - cifs_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(server, true); } dfs_cache_free_tgts(tl); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index ebe236b9d9f5..235aa1b395eb 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -896,7 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - cifs_reconnect(mid->server, false); + cifs_signal_cifsd_for_reconnect(mid->server, false); } } From 00c796eecba4898194ea549679797ee28f89a92f Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Thu, 31 Mar 2022 23:55:41 +0200 Subject: [PATCH 190/708] cifs: remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Jakob Koschel Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index c653beb735b8..3fe47a88f47d 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -150,16 +150,18 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) struct smb2_transform_hdr *thdr = (struct smb2_transform_hdr *)buf; struct cifs_ses *ses = NULL; + struct cifs_ses *iter; /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) { - if (ses->Suid == le64_to_cpu(thdr->SessionId)) + list_for_each_entry(iter, &srvr->smb_ses_list, smb_ses_list) { + if (iter->Suid == le64_to_cpu(thdr->SessionId)) { + ses = iter; break; + } } spin_unlock(&cifs_tcp_ses_lock); - if (list_entry_is_head(ses, &srvr->smb_ses_list, - smb_ses_list)) { + if (!ses) { cifs_dbg(VFS, "no decryption - session id not found\n"); return 1; } From 10cb21f4ff3f9cb36d1e1c39bf80426f02f4986a Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Thu, 31 Mar 2022 10:07:57 +0530 Subject: [PATCH 191/708] Revert "ath11k: mesh: add support for 256 bitmap in blockack frames in 11ax" This reverts commit 743b9065fe6348a5f8f5ce04869ce2d701e5e1bc. The original commit breaks the 256 bitmap in blockack frames in AP mode. After reverting the commit the feature works again in both AP and mesh modes Tested-on: IPQ8074 hw2.0 PCI WLAN.HK.2.6.0.1-00786-QCAHKSWPL_SILICONZ-1 Fixes: 743b9065fe63 ("ath11k: mesh: add support for 256 bitmap in blockack frames in 11ax") Signed-off-by: Anilkumar Kolli Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1648701477-16367-1-git-send-email-quic_akolli@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d5b83f90d27a..e6b34b0d61bd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3136,6 +3136,20 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, arvif->do_not_send_tmpl = true; else arvif->do_not_send_tmpl = false; + + if (vif->bss_conf.he_support) { + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + WMI_VDEV_PARAM_BA_MODE, + WMI_BA_MODE_BUFFER_SIZE_256); + if (ret) + ath11k_warn(ar->ab, + "failed to set BA BUFFER SIZE 256 for vdev: %d\n", + arvif->vdev_id); + else + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "Set BA BUFFER SIZE 256 for VDEV: %d\n", + arvif->vdev_id); + } } if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { @@ -3171,14 +3185,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, if (arvif->is_up && vif->bss_conf.he_support && vif->bss_conf.he_oper.params) { - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_BA_MODE, - WMI_BA_MODE_BUFFER_SIZE_256); - if (ret) - ath11k_warn(ar->ab, - "failed to set BA BUFFER SIZE 256 for vdev: %d\n", - arvif->vdev_id); - param_id = WMI_VDEV_PARAM_HEOPS_0_31; param_value = vif->bss_conf.he_oper.params; ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, From 527a9867af29ff89f278d037db704e0ed50fb666 Mon Sep 17 00:00:00 2001 From: Jan Varho Date: Mon, 4 Apr 2022 19:42:30 +0300 Subject: [PATCH 192/708] random: do not split fast init input in add_hwgenerator_randomness() add_hwgenerator_randomness() tries to only use the required amount of input for fast init, but credits all the entropy, rather than a fraction of it. Since it's hard to determine how much entropy is left over out of a non-unformly random sample, either give it all to fast init or credit it, but don't attempt to do both. In the process, we can clean up the injection code to no longer need to return a value. Signed-off-by: Jan Varho [Jason: expanded commit message] Fixes: 73c7733f122e ("random: do not throw away excess input to crng_fast_load") Cc: stable@vger.kernel.org # 5.17+, requires af704c856e88 Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1d8242969751..ee3ad2ba0942 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -437,11 +437,8 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * This shouldn't be set by functions like add_device_randomness(), * where we can't trust the buffer passed to it is guaranteed to be * unpredictable (so it might not have any entropy at all). - * - * Returns the number of bytes processed from input, which is bounded - * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_pre_init_inject(const void *input, size_t len, bool account) +static void crng_pre_init_inject(const void *input, size_t len, bool account) { static int crng_init_cnt = 0; struct blake2s_state hash; @@ -452,18 +449,15 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) spin_lock_irqsave(&base_crng.lock, flags); if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; + return; } - if (account) - len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); blake2s_update(&hash, input, len); blake2s_final(&hash, base_crng.key); if (account) { - crng_init_cnt += len; + crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { ++base_crng.generation; crng_init = 1; @@ -474,8 +468,6 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) if (crng_init == 1) pr_notice("fast init done\n"); - - return len; } static void _get_random_bytes(void *buf, size_t nbytes) @@ -1141,12 +1133,9 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - size_t ret = crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; + crng_pre_init_inject(buffer, count, true); + mix_pool_bytes(buffer, count); + return; } /* From d39268ad24c0fd0665d0c5cf55a7c1a0ebf94766 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 18 Mar 2022 06:52:59 -0700 Subject: [PATCH 193/708] x86/mm/tlb: Revert retpoline avoidance approach 0day reported a regression on a microbenchmark which is intended to stress the TLB flushing path: https://lore.kernel.org/all/20220317090415.GE735@xsang-OptiPlex-9020/ It pointed at a commit from Nadav which intended to remove retpoline overhead in the TLB flushing path by taking the 'cond'-ition in on_each_cpu_cond_mask(), pre-calculating it, and incorporating it into 'cpumask'. That allowed the code to use a bunch of earlier direct calls instead of later indirect calls that need a retpoline. But, in practice, threads can go idle (and into lazy TLB mode where they don't need to flush their TLB) between the early and late calls. It works in this direction and not in the other because TLB-flushing threads tend to hold mmap_lock for write. Contention on that lock causes threads to _go_ idle right in this early/late window. There was not any performance data in the original commit specific to the retpoline overhead. I did a few tests on a system with retpolines: https://lore.kernel.org/all/dd8be93c-ded6-b962-50d4-96b1c3afb2b7@intel.com/ which showed a possible small win. But, that small win pales in comparison with the bigger loss induced on non-retpoline systems. Revert the patch that removed the retpolines. This was not a clean revert, but it was self-contained enough not to be too painful. Fixes: 6035152d8eeb ("x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy()") Reported-by: kernel test robot Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Acked-by: Nadav Amit Cc: Link: https://lkml.kernel.org/r/164874672286.389.7021457716635788197.tip-bot2@tip-bot2 --- arch/x86/mm/tlb.c | 37 +++++-------------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6eb4d91d5365..d400b6d9d246 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -855,13 +855,11 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu) +static bool tlb_is_not_lazy(int cpu, void *data) { return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); } -static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared); @@ -890,36 +888,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ - if (info->freed_tables) { + if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - } else { - /* - * Although we could have used on_each_cpu_cond_mask(), - * open-coding it has performance advantages, as it eliminates - * the need for indirect calls or retpolines. In addition, it - * allows to use a designated cpumask for evaluating the - * condition, instead of allocating one. - * - * This code works under the assumption that there are no nested - * TLB flushes, an assumption that is already made in - * flush_tlb_mm_range(). - * - * cond_cpumask is logically a stack-local variable, but it is - * more efficient to have it off the stack and not to allocate - * it on demand. Preemption is disabled and this code is - * non-reentrant. - */ - struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask); - int cpu; - - cpumask_clear(cond_cpumask); - - for_each_cpu(cpu, cpumask) { - if (tlb_is_not_lazy(cpu)) - __cpumask_set_cpu(cpu, cond_cpumask); - } - on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true); - } + else + on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + (void *)info, 1, cpumask); } void flush_tlb_multi(const struct cpumask *cpumask, From 4d809f69695d4e7d1378b3a072fa9aef23123018 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 28 Feb 2022 17:53:30 +0100 Subject: [PATCH 194/708] IB/rdmavt: add lock to call to rvt_error_qp to prevent a race condition The documentation of the function rvt_error_qp says both r_lock and s_lock need to be held when calling that function. It also asserts using lockdep that both of those locks are held. However, the commit I referenced in Fixes accidentally makes the call to rvt_error_qp in rvt_ruc_loopback no longer covered by r_lock. This results in the lockdep assertion failing and also possibly in a race condition. Fixes: d757c60eca9b ("IB/rdmavt: Fix concurrency panics in QP post_send and modify to error") Link: https://lore.kernel.org/r/20220228165330.41546-1-dossche.niels@gmail.com Signed-off-by: Niels Dossche Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ae50b56e8913..8ef112f883a7 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3190,7 +3190,11 @@ serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); rvt_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); From 02c7efa43627163e489a8db87882445a0ff381f7 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 31 Jan 2022 13:23:41 -0800 Subject: [PATCH 195/708] Documentation: kunit: fix path to .kunitconfig in start.rst Commit ddbd60c779b4 ("kunit: use --build_dir=.kunit as default") changed the default --build_dir, which had the side effect of making `.kunitconfig` move to `.kunit/.kunitconfig`. However, the first few lines of kunit/start.rst never got updated, oops. Fix this by telling people to run kunit.py first, which will automatically generate the .kunit directory and .kunitconfig file, and then edit the file manually as desired. Reported-by: Yifan Yuan Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/start.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index ad168d16968f..867a4bba6bf6 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -41,13 +41,18 @@ or ``VFAT_FS``. To run ``FAT_KUNIT_TEST``, the ``.kunitconfig`` has: CONFIG_MSDOS_FS=y CONFIG_FAT_KUNIT_TEST=y -1. A good starting point for the ``.kunitconfig``, is the KUnit default - config. Run the command: +1. A good starting point for the ``.kunitconfig`` is the KUnit default config. + You can generate it by running: .. code-block:: bash cd $PATH_TO_LINUX_REPO - cp tools/testing/kunit/configs/default.config .kunitconfig + tools/testing/kunit/kunit.py config + cat .kunit/.kunitconfig + +.. note :: + ``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is + ``.kunit`` by default. .. note :: You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as From 3bbbb3e5b59f4ca0f7493307a03f99930737bb76 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 10 Mar 2022 08:32:58 +0100 Subject: [PATCH 196/708] dt-bindings: extcon: maxim,max77843: fix ports type The "ports" property can contain multiple ports as name suggests, so it should be using "ports" type from device graphs. Reported-by: Rob Herring Fixes: 9729cad0278b ("dt-bindings: extcon: maxim,max77843: Add MAX77843 bindings") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220310073258.24060-1-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/extcon/maxim,max77843.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml index f9ffe3d6f957..0216ec868c3e 100644 --- a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml +++ b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml @@ -25,7 +25,7 @@ properties: $ref: /schemas/connector/usb-connector.yaml# ports: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/properties/ports description: Any connector to the data bus of this controller should be modelled using the OF graph bindings specified From e7ccd8a49a050428bd842822970b70c66fd0b988 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Mar 2022 15:04:16 +0200 Subject: [PATCH 197/708] dt-bindings: power: renesas,apmu: Fix cpus property limits "make dtbs_check": arch/arm/boot/dts/r8a7791-koelsch.dtb: apmu@e6152000: cpus:0: [6, 7] is too long From schema: Documentation/devicetree/bindings/power/renesas,apmu.yaml Correct the minimum and maximum number of CPUs controlled by a single APMU instance. Fixes: 39bd2b6a3783b899 ("dt-bindings: Improve phandle-array schemas") Signed-off-by: Geert Uytterhoeven Reviewed-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/9ece1a07bbcb95abc9d80e6a6ecc95806a294a11.1648645279.git.geert+renesas@glider.be --- Documentation/devicetree/bindings/power/renesas,apmu.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/renesas,apmu.yaml b/Documentation/devicetree/bindings/power/renesas,apmu.yaml index 4d293b2b2f84..d77fc88050c8 100644 --- a/Documentation/devicetree/bindings/power/renesas,apmu.yaml +++ b/Documentation/devicetree/bindings/power/renesas,apmu.yaml @@ -36,7 +36,8 @@ properties: cpus: $ref: /schemas/types.yaml#/definitions/phandle-array items: - maxItems: 1 + minItems: 1 + maxItems: 4 description: | Array of phandles pointing to CPU cores, which should match the order of CPU cores used by the WUPCR and PSTR registers in the Advanced Power From dfbba2518aac4204203b0697a894d3b2f80134d3 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sun, 3 Apr 2022 15:23:22 +0900 Subject: [PATCH 198/708] Revert "ACPI: processor: idle: Only flush cache on entering C3" Revert commit 87ebbb8c612b ("ACPI: processor: idle: Only flush cache on entering C3") that broke the assumptions of the acpi_idle_play_dead() callers. Namely, the CPU cache must always be flushed in acpi_idle_play_dead(), regardless of the target C-state that is going to be requested, because this is likely to be part of a CPU offline procedure or preparation for entering a system-wide sleep state and the lack of synchronization between the CPU cache and RAM may lead to problems going forward, for example when the CPU is brought back online. In particular, it breaks resume from suspend-to-RAM on Lenovo ThinkPad C13 which fails occasionally until the problematic commit is reverted. Signed-off-by: Akihiko Odaki [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 32b20efff5f8..4556c86c3465 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -570,8 +570,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); - if (cx->type == ACPI_STATE_C3) - ACPI_FLUSH_CPU_CACHE(); + ACPI_FLUSH_CPU_CACHE(); while (1) { From 27e4a85cf79b74650b0c60541fc989af7954ba62 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 30 Mar 2022 09:57:41 -0500 Subject: [PATCH 199/708] dt-bindings: Fix incomplete if/then/else schemas A recent review highlighted that the json-schema meta-schema allows any combination of if/then/else schema keywords even though if, then or else by themselves makes little sense. With an added meta-schema to only allow valid combinations, there's a handful of schemas found which need fixing in a variety of ways. Incorrect indentation is the most common issue. Cc: Lars-Peter Clausen Cc: Michael Hennerich Cc: Jonathan Cameron Cc: Krzysztof Kozlowski Cc: Olivier Moysan Cc: Arnaud Pouliquen Cc: Bjorn Andersson Cc: Georgi Djakov Cc: Ulf Hansson Cc: Thierry Reding Cc: Jonathan Hunter Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Kishon Vijay Abraham I Cc: Vinod Koul Cc: Mark Brown Cc: Fabrice Gasnier Cc: Grygorii Strashko Cc: Dmitry Osipenko Cc: linux-iio@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-mmc@vger.kernel.org Cc: linux-tegra@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-phy@lists.infradead.org Signed-off-by: Rob Herring Acked-by: Jakub Kicinski Reviewed-by: Krzysztof Kozlowski Acked-by: Mark Brown Link: https://lore.kernel.org/r/20220330145741.3044896-1-robh@kernel.org --- .../bindings/iio/adc/adi,ad7476.yaml | 1 + .../bindings/iio/adc/st,stm32-dfsdm-adc.yaml | 8 +-- .../bindings/iio/dac/adi,ad5360.yaml | 6 +- .../bindings/interconnect/qcom,rpm.yaml | 70 +++++++++---------- .../bindings/mmc/nvidia,tegra20-sdhci.yaml | 2 + .../bindings/net/ti,davinci-mdio.yaml | 1 + .../bindings/phy/nvidia,tegra20-usb-phy.yaml | 20 +++--- .../bindings/phy/qcom,usb-hs-phy.yaml | 32 +++++---- .../bindings/regulator/fixed-regulator.yaml | 34 ++++----- .../bindings/sound/st,stm32-sai.yaml | 6 +- .../devicetree/bindings/sram/sram.yaml | 16 ++--- 11 files changed, 100 insertions(+), 96 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml index cf711082ad7d..666414a9c0de 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml @@ -98,6 +98,7 @@ allOf: - ti,adc121s - ti,ads7866 - ti,ads7868 + then: required: - vcc-supply # Devices with a vref diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml index 7c260f209687..912372706280 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml @@ -174,7 +174,7 @@ patternProperties: contains: const: st,stm32-dfsdm-adc - - then: + then: properties: st,adc-channels: minItems: 1 @@ -206,7 +206,7 @@ patternProperties: contains: const: st,stm32-dfsdm-dmic - - then: + then: properties: st,adc-channels: maxItems: 1 @@ -254,7 +254,7 @@ allOf: contains: const: st,stm32h7-dfsdm - - then: + then: patternProperties: "^filter@[0-9]+$": properties: @@ -269,7 +269,7 @@ allOf: contains: const: st,stm32mp1-dfsdm - - then: + then: patternProperties: "^filter@[0-9]+$": properties: diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml index 0d8fb56f4b09..65f86f26947c 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml @@ -59,9 +59,9 @@ allOf: contains: enum: - adi,ad5371 - then: - required: - - vref2-supply + then: + required: + - vref2-supply examples: - | diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml index 89853b482513..8a676fef8c1d 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml @@ -93,48 +93,48 @@ allOf: - qcom,sdm660-gnoc - qcom,sdm660-snoc - then: - properties: - clock-names: - items: - - const: bus - - const: bus_a + then: + properties: + clock-names: + items: + - const: bus + - const: bus_a - clocks: - items: - - description: Bus Clock - - description: Bus A Clock + clocks: + items: + - description: Bus Clock + - description: Bus A Clock - # Child node's properties - patternProperties: - '^interconnect-[a-z0-9]+$': - type: object - description: - snoc-mm is a child of snoc, sharing snoc's register address space. + # Child node's properties + patternProperties: + '^interconnect-[a-z0-9]+$': + type: object + description: + snoc-mm is a child of snoc, sharing snoc's register address space. - properties: - compatible: - enum: - - qcom,msm8939-snoc-mm + properties: + compatible: + enum: + - qcom,msm8939-snoc-mm - '#interconnect-cells': - const: 1 + '#interconnect-cells': + const: 1 - clock-names: - items: - - const: bus - - const: bus_a + clock-names: + items: + - const: bus + - const: bus_a - clocks: - items: - - description: Bus Clock - - description: Bus A Clock + clocks: + items: + - description: Bus Clock + - description: Bus A Clock - required: - - compatible - - '#interconnect-cells' - - clock-names - - clocks + required: + - compatible + - '#interconnect-cells' + - clock-names + - clocks - if: properties: diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml index ce64b3498378..f3f4d5b02744 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml @@ -197,6 +197,8 @@ allOf: - nvidia,tegra30-sdhci - nvidia,tegra114-sdhci - nvidia,tegra124-sdhci + then: + properties: clocks: items: - description: module clock diff --git a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml index dbfca5ee9139..6f44f9516c36 100644 --- a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml +++ b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml @@ -56,6 +56,7 @@ if: compatible: contains: const: ti,davinci_mdio +then: required: - bus_freq diff --git a/Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml b/Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml index dfde0eaf66e1..d61585c96e31 100644 --- a/Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml @@ -275,17 +275,17 @@ allOf: - nvidia,hssquelch-level - nvidia,hsdiscon-level - else: - properties: - clocks: - maxItems: 4 + else: + properties: + clocks: + maxItems: 4 - clock-names: - items: - - const: reg - - const: pll_u - - const: timer - - const: utmi-pads + clock-names: + items: + - const: reg + - const: pll_u + - const: timer + - const: utmi-pads - if: properties: diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml index e23e5590eaa3..0655e485b260 100644 --- a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml @@ -14,24 +14,24 @@ if: compatible: contains: const: qcom,usb-hs-phy-apq8064 - then: - properties: - resets: - maxItems: 1 +then: + properties: + resets: + maxItems: 1 - reset-names: - const: por + reset-names: + const: por - else: - properties: - resets: - minItems: 2 - maxItems: 2 +else: + properties: + resets: + minItems: 2 + maxItems: 2 - reset-names: - items: - - const: phy - - const: por + reset-names: + items: + - const: phy + - const: por properties: compatible: @@ -92,6 +92,8 @@ additionalProperties: false examples: - | otg: usb-controller { + #reset-cells = <1>; + ulpi { phy { compatible = "qcom,usb-hs-phy-msm8974", "qcom,usb-hs-phy"; diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index 9b131c6facbc..84eeaef179a5 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -18,23 +18,23 @@ description: allOf: - $ref: "regulator.yaml#" - -if: - properties: - compatible: - contains: - const: regulator-fixed-clock - required: - - clocks -else: - if: - properties: - compatible: - contains: - const: regulator-fixed-domain - required: - - power-domains - - required-opps + - if: + properties: + compatible: + contains: + const: regulator-fixed-clock + then: + required: + - clocks + - if: + properties: + compatible: + contains: + const: regulator-fixed-domain + then: + required: + - power-domains + - required-opps properties: compatible: diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index b3dbcba33e41..fe2e15504ebc 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml @@ -136,8 +136,7 @@ allOf: compatible: contains: const: st,stm32f4-sai - - - then: + then: properties: clocks: items: @@ -148,8 +147,7 @@ allOf: items: - const: x8k - const: x11k - - - else: + else: properties: clocks: items: diff --git a/Documentation/devicetree/bindings/sram/sram.yaml b/Documentation/devicetree/bindings/sram/sram.yaml index 668a9a41a775..993430be355b 100644 --- a/Documentation/devicetree/bindings/sram/sram.yaml +++ b/Documentation/devicetree/bindings/sram/sram.yaml @@ -136,14 +136,14 @@ required: - reg if: - properties: - compatible: - contains: - enum: - - qcom,rpm-msg-ram - - rockchip,rk3288-pmu-sram - -else: + not: + properties: + compatible: + contains: + enum: + - qcom,rpm-msg-ram + - rockchip,rk3288-pmu-sram +then: required: - "#address-cells" - "#size-cells" From 866f404f1b7431ce956bf2f16264ef3ca51cb0dc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 17 Mar 2022 15:29:52 +0100 Subject: [PATCH 200/708] dt-bindings: irqchip: mrvl,intc: refresh maintainers Jason's email bounces and his address was dropped from maintainers in commit 509920aee72a ("MAINTAINERS: Move Jason Cooper to CREDITS"), so drop him here too. Switch other maintainers from IRQCHIP subsystem maintainers to Marvell Orion platform maintainers because its a bigger chance they know the hardware. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Andrew Lunn Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220317142952.479413-1-krzysztof.kozlowski@canonical.com --- .../devicetree/bindings/interrupt-controller/mrvl,intc.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml index 372ccbfae771..5a583bf3dbc1 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml @@ -7,10 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Marvell MMP/Orion Interrupt controller bindings maintainers: - - Thomas Gleixner - - Jason Cooper - - Marc Zyngier - - Rob Herring + - Andrew Lunn + - Gregory Clement allOf: - if: From c3b0068194269193286209d7a70ad1e93a13247f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 1 Apr 2022 09:12:47 -0500 Subject: [PATCH 201/708] dt-bindings: Fix 'enum' lists with duplicate entries There's no reason to list the same value twice in an 'enum'. Fix all the occurrences in the tree. A meta-schema change will catch future ones. Cc: Krzysztof Kozlowski Cc: Thierry Reding Cc: Jonathan Hunter Cc: Mauro Carvalho Chehab Cc: Charles Keepax Cc: Linus Walleij Cc: Sebastian Reichel Cc: Tony Lindgren Cc: Yunfei Dong Cc: - Cc: linux-media@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-gpio@vger.kernel.org Cc: linux-pm@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Acked-by: Sebastian Reichel Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20220401141247.2993925-1-robh@kernel.org --- .../bindings/arm/tegra/nvidia,tegra20-pmc.yaml | 1 - Documentation/devicetree/bindings/bus/ti-sysc.yaml | 1 - .../bindings/media/mediatek,vcodec-encoder.yaml | 1 - .../devicetree/bindings/pinctrl/cirrus,madera.yaml | 11 +++++------ .../devicetree/bindings/power/supply/bq2415x.yaml | 1 - 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml index 0afec83cc723..564ae6aaccf7 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml @@ -13,7 +13,6 @@ maintainers: properties: compatible: enum: - - nvidia,tegra20-pmc - nvidia,tegra20-pmc - nvidia,tegra30-pmc - nvidia,tegra114-pmc diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.yaml b/Documentation/devicetree/bindings/bus/ti-sysc.yaml index bd40213302da..fced4082b047 100644 --- a/Documentation/devicetree/bindings/bus/ti-sysc.yaml +++ b/Documentation/devicetree/bindings/bus/ti-sysc.yaml @@ -34,7 +34,6 @@ properties: oneOf: - items: - enum: - - ti,sysc-omap2 - ti,sysc-omap2 - ti,sysc-omap4 - ti,sysc-omap4-simple diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml index e7b65a91c92c..df7df06c378f 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml @@ -106,7 +106,6 @@ allOf: enum: - mediatek,mt8173-vcodec-enc - mediatek,mt8192-vcodec-enc - - mediatek,mt8173-vcodec-enc then: properties: diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml index 8a90d8273767..6bd42e43cdab 100644 --- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml +++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml @@ -48,13 +48,12 @@ properties: Name of one pin group to configure. enum: [ aif1, aif2, aif3, aif4, mif1, mif2, mif3, pdmspk1, pdmspk2, dmic4, dmic5, dmic6, gpio1, gpio2, gpio3, - gpio4, gpio5, gpio6, gpio7, gpio7, gpio8, gpio9, + gpio4, gpio5, gpio6, gpio7, gpio8, gpio9, gpio10, gpio11, gpio12, gpio13, gpio14, gpio15, - gpio16, gpio17, gpio17, gpio18, gpio19, gpio20, - gpio21, gpio22, gpio23, gpio24, gpio25, gpio26, - gpio27, gpio27, gpio28, gpio29, gpio30, gpio31, - gpio32, gpio33, gpio34, gpio35, gpio36, gpio37, - gpio37, gpio38, gpio39 ] + gpio16, gpio17, gpio18, gpio19, gpio20, gpio21, + gpio22, gpio23, gpio24, gpio25, gpio26, gpio27, + gpio28, gpio29, gpio30, gpio31, gpio32, gpio33, + gpio34, gpio35, gpio36, gpio37, gpio38, gpio39 ] function: description: diff --git a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml index f8461f06e6f4..118cf484cc69 100644 --- a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml @@ -16,7 +16,6 @@ allOf: properties: compatible: enum: - - ti,bq24150 - ti,bq24150 - ti,bq24150a - ti,bq24151 From 8ff88bec6f6186c406aa71312b2919e56f7b8084 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Mon, 21 Mar 2022 13:27:42 +0800 Subject: [PATCH 202/708] selftests/vDSO: fix array_size.cocci warning Fix the following coccicheck warning: tools/testing/selftests/vDSO/vdso_test_correctness.c:309:46-47: WARNING: Use ARRAY_SIZE tools/testing/selftests/vDSO/vdso_test_correctness.c:373:46-47: WARNING: Use ARRAY_SIZE It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64. Signed-off-by: Guo Zhengkui Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index c4aea794725a..e691a3cf1491 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -20,6 +20,7 @@ #include #include "vdso_config.h" +#include "../kselftest.h" static const char **name; @@ -306,10 +307,8 @@ static void test_clock_gettime(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime(-1, "invalid"); @@ -370,10 +369,8 @@ static void test_clock_gettime64(void) return; } - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { + for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++) test_one_clock_gettime64(clock, clocknames[clock]); - } /* Also test some invalid clock ids */ test_one_clock_gettime64(-1, "invalid"); From 1585b1b55a2b9086823a6b30031eb63f965f8d44 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Mon, 21 Mar 2022 18:25:17 +0800 Subject: [PATCH 203/708] selftests/proc: fix array_size.cocci warning Fix the following coccicheck warning: tools/testing/selftests/proc/proc-pid-vm.c:371:26-27: WARNING: Use ARRAY_SIZE tools/testing/selftests/proc/proc-pid-vm.c:420:26-27: WARNING: Use ARRAY_SIZE It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64. Signed-off-by: Guo Zhengkui Signed-off-by: Shuah Khan --- tools/testing/selftests/proc/proc-pid-vm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 18a3bde8bc96..28604c9f805c 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -46,6 +46,8 @@ #include #include +#include "../kselftest.h" + static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) { return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); @@ -368,7 +370,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } @@ -417,7 +419,7 @@ int main(void) }; int i; - for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { + for (i = 0; i < ARRAY_SIZE(S); i++) { assert(memmem(buf, rv, S[i], strlen(S[i]))); } } From aa8ce29931d6a37aa80f10ae7aa30045108f276d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Mar 2022 17:55:54 +0800 Subject: [PATCH 204/708] selftests: x86: add 32bit build warnings for SUSE In order to successfully build all these 32bit tests, these 32bit gcc and glibc packages, named gcc-32bit and glibc-devel-static-32bit on SUSE, need to be installed. This patch added this information in warn_32bit_failure. Signed-off-by: Geliang Tang Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 53df7d3893d3..0388c4d60af0 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -92,6 +92,10 @@ warn_32bit_failure: echo "If you are using a Fedora-like distribution, try:"; \ echo ""; \ echo " yum install glibc-devel.*i686"; \ + echo ""; \ + echo "If you are using a SUSE-like distribution, try:"; \ + echo ""; \ + echo " zypper install gcc-32bit glibc-devel-static-32bit"; \ exit 0; endif From 52035628fae646269b1379417926fa3d60ef87d0 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 24 Mar 2022 15:39:28 -0700 Subject: [PATCH 205/708] selftests: fix header dependency for pid_namespace selftests The way the test target was defined before, when building with clang we get a command line like this: clang -Wall -Werror -g -I../../../../usr/include/ \ regression_enomem.c ../pidfd/pidfd.h -o regression_enomem This yields an error, because clang thinks we want to produce both a *.o file, as well as a precompiled header: clang: error: cannot specify -o when generating multiple output files gcc, for whatever reason, doesn't exhibit the same behavior which I suspect is why the problem wasn't noticed before. This can be fixed simply by using the LOCAL_HDRS infrastructure the selftests lib.mk provides. It does the right think and marks the target as depending on the header (so if the header changes, we rebuild), but it filters the header out of the compiler command line, so we don't get the error described above. Signed-off-by: Axel Rasmussen Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pid_namespace/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index dcaefa224ca0..edafaca1aeb3 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g -I../../../../usr/include/ -TEST_GEN_PROGS := regression_enomem +TEST_GEN_PROGS = regression_enomem + +LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h include ../lib.mk - -$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h From 187816d07729ff88e75d84efbc668642106cebf3 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 24 Mar 2022 15:39:29 -0700 Subject: [PATCH 206/708] selftests: fix an unused variable warning in pidfd selftest I fixed a few warnings like this in commit e2aa5e650b07 ("selftests: fixup build warnings in pidfd / clone3 tests"), but I missed this one by mistake. Since this variable is unused, remove it. Signed-off-by: Axel Rasmussen Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_wait.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 17999e082aa7..070c1c876df1 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -95,7 +95,6 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, From 63e6b2a42342c3297cce286fb124c99be9e0f3fd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 24 Mar 2022 16:19:06 -0700 Subject: [PATCH 207/708] selftests/harness: Run TEARDOWN for ASSERT failures The kselftest test harness has traditionally not run the registered TEARDOWN handler when a test encountered an ASSERT. This creates unexpected situations and tests need to be very careful about using ASSERT, which seems a needless hurdle for test writers. Because of the harness's design for optional failure handlers, the original implementation of ASSERT used an abort() to immediately stop execution, but that meant the context for running teardown was lost. Instead, use setjmp/longjmp so that teardown can be done. Failed SETUP routines continue to not be followed by TEARDOWN, though. Cc: Andy Lutomirski Cc: Will Drewry Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 49 ++++++++++++++------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 11779405dc80..696eab05f995 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -64,6 +64,7 @@ #include #include #include +#include #include "kselftest.h" @@ -183,7 +184,10 @@ struct __test_metadata *_metadata, \ struct __fixture_variant_metadata *variant) \ { \ - test_name(_metadata); \ + _metadata->setup_completed = true; \ + if (setjmp(_metadata->env) == 0) \ + test_name(_metadata); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata _##test_name##_object = \ { .name = #test_name, \ @@ -356,10 +360,7 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. - * - * Warning: use of ASSERT_* here will skip TEARDOWN. */ -/* TODO(wad) register fixtures on dedicated test lists. */ #define TEST_F(fixture_name, test_name) \ __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) @@ -381,12 +382,17 @@ /* fixture data is alloced, setup, and torn down per call. */ \ FIXTURE_DATA(fixture_name) self; \ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ - fixture_name##_setup(_metadata, &self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (!_metadata->passed) \ - return; \ - fixture_name##_##test_name(_metadata, &self, variant->data); \ - fixture_name##_teardown(_metadata, &self); \ + if (setjmp(_metadata->env) == 0) { \ + fixture_name##_setup(_metadata, &self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + _metadata->setup_completed = true; \ + fixture_name##_##test_name(_metadata, &self, variant->data); \ + } \ + if (_metadata->setup_completed) \ + fixture_name##_teardown(_metadata, &self); \ + __test_check_assert(_metadata); \ } \ static struct __test_metadata \ _##fixture_name##_##test_name##_object = { \ @@ -683,7 +689,7 @@ */ #define OPTIONAL_HANDLER(_assert) \ for (; _metadata->trigger; _metadata->trigger = \ - __bail(_assert, _metadata->no_print, _metadata->step)) + __bail(_assert, _metadata)) #define __INC_STEP(_metadata) \ /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ @@ -830,6 +836,9 @@ struct __test_metadata { bool timed_out; /* did this test timeout instead of exiting? */ __u8 step; bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ + bool aborted; /* stopped test due to failed ASSERT */ + bool setup_completed; /* did setup finish? */ + jmp_buf env; /* for exiting out of test early */ struct __test_results *results; struct __test_metadata *prev, *next; }; @@ -848,14 +857,24 @@ static inline void __register_test(struct __test_metadata *t) __LIST_APPEND(t->fixture->tests, t); } -static inline int __bail(int for_realz, bool no_print, __u8 step) +static inline int __bail(int for_realz, struct __test_metadata *t) { + /* if this is ASSERT, return immediately. */ if (for_realz) { - if (no_print) - _exit(step); + t->aborted = true; + longjmp(t->env, 1); + } + /* otherwise, end the for loop and continue. */ + return 0; +} + +static inline void __test_check_assert(struct __test_metadata *t) +{ + if (t->aborted) { + if (t->no_print) + _exit(t->step); abort(); } - return 0; } struct __test_metadata *__active_test; From 79ee8aa31d518c1fd5f3b1b1ac39dd1fb4dc7039 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Mar 2022 16:19:07 -0700 Subject: [PATCH 208/708] selftests/harness: Pass variant to teardown FIXTURE_VARIANT data is passed to FIXTURE_SETUP and TEST_F as "variant". In some cases, the variant will change the setup, such that expectations also change on teardown. Also pass variant to FIXTURE_TEARDOWN. The new FIXTURE_TEARDOWN logic is identical to that in FIXTURE_SETUP, right above. Signed-off-by: Willem de Bruijn Reviewed-by: Jakub Kicinski Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201210231010.420298-1-willemdebruijn.kernel@gmail.com Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 696eab05f995..25f4d54067c0 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -291,7 +291,9 @@ #define FIXTURE_TEARDOWN(fixture_name) \ void fixture_name##_teardown( \ struct __test_metadata __attribute__((unused)) *_metadata, \ - FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) /** * FIXTURE_VARIANT() - Optionally called once per fixture @@ -306,9 +308,9 @@ * ... * }; * - * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F() - * as *variant*. Variants allow the same tests to be run with different - * arguments. + * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and + * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with + * different arguments. */ #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name @@ -391,7 +393,7 @@ fixture_name##_##test_name(_metadata, &self, variant->data); \ } \ if (_metadata->setup_completed) \ - fixture_name##_teardown(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self, variant->data); \ __test_check_assert(_metadata); \ } \ static struct __test_metadata \ From 23274739a5b6166f74d8d9cb5243d7bf6b46aab9 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 18 Mar 2022 09:28:13 +0000 Subject: [PATCH 209/708] firmware: arm_scmi: Fix sorting of retrieved clock rates During SCMI Clock protocol initialization, after having retrieved from the SCMI platform all the available discrete rates for a specific clock, the clock rates array is sorted, unfortunately using a pointer to its end as a base instead of its start, so that sorting does not work. Fix invocation of sort() passing as base a pointer to the start of the retrieved clock rates array. Link: https://lore.kernel.org/r/20220318092813.49283-1-cristian.marussi@arm.com Fixes: dccec73de91d ("firmware: arm_scmi: Keep the discrete clock rates sorted") Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index cf6fed6dec77..ef6431c6eb1c 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -210,7 +210,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, if (rate_discrete && rate) { clk->list.num_rates = tot_rate_cnt; - sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL); + sort(clk->list.rates, tot_rate_cnt, sizeof(*rate), + rate_cmp_func, NULL); } clk->rate_discrete = rate_discrete; From f1ad601d1f4a8f5dac69706d641f3a88beccc488 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 1 Apr 2022 07:55:37 +0000 Subject: [PATCH 210/708] firmware: arm_scmi: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://lore.kernel.org/r/20220401075537.2407376-1-lv.ruyi@zte.com.cn Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index ef6431c6eb1c..45600acc0f45 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -49,7 +49,7 @@ struct scmi_msg_resp_clock_describe_rates { struct { __le32 value_low; __le32 value_high; - } rate[0]; + } rate[]; #define RATE_TO_U64(X) \ ({ \ typeof(X) x = (X); \ From bf36619a5463fbe6d3ecde37bb13680b532a253b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Apr 2022 11:24:19 +0100 Subject: [PATCH 211/708] firmware: arm_scmi: Fix sparse warnings in OPTEE transport driver The sparse checker complains about converting pointers between address spaces. We correctly stored an __iomem pointer in struct scmi_optee_channel, but discarded the __iomem when returning it from get_channel_shm, causing one warning. Then we passed the non-__iomem pointer return from get_channel_shm at two other places, where an __iomem pointer is expected, causing couple of other warnings Add the appropriate __iomem annotations at all places where it is missing. optee.c:414:20: warning: incorrect type in return expression (different address spaces) optee.c:414:20: expected struct scmi_shared_mem * optee.c:414:20: got struct scmi_shared_mem [noderef] __iomem *shmem optee.c:426:26: warning: incorrect type in argument 1 (different address spaces) optee.c:426:26: expected struct scmi_shared_mem [noderef] __iomem *shmem optee.c:426:26: got struct scmi_shared_mem *shmem optee.c:441:30: warning: incorrect type in argument 1 (different address spaces) optee.c:441:30: expected struct scmi_shared_mem [noderef] __iomem *shmem optee.c:441:30: got struct scmi_shared_mem *shmem Link: https://lore.kernel.org/r/20220404102419.1159705-1-sudeep.holla@arm.com Cc: Etienne Carriere Cc: Cristian Marussi Reported-by: kernel test robot Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/optee.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index 734f1eeee161..8302a2b4aeeb 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -405,8 +405,8 @@ static int scmi_optee_chan_free(int id, void *p, void *data) return 0; } -static struct scmi_shared_mem *get_channel_shm(struct scmi_optee_channel *chan, - struct scmi_xfer *xfer) +static struct scmi_shared_mem __iomem * +get_channel_shm(struct scmi_optee_channel *chan, struct scmi_xfer *xfer) { if (!chan) return NULL; @@ -419,7 +419,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo, struct scmi_xfer *xfer) { struct scmi_optee_channel *channel = cinfo->transport_info; - struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer); + struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer); int ret; mutex_lock(&channel->mu); @@ -436,7 +436,7 @@ static void scmi_optee_fetch_response(struct scmi_chan_info *cinfo, struct scmi_xfer *xfer) { struct scmi_optee_channel *channel = cinfo->transport_info; - struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer); + struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer); shmem_fetch_response(shmem, xfer); } From 38d4e5cf5b08798f093374e53c2f4609d5382dd5 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 22 Mar 2022 13:48:00 +0100 Subject: [PATCH 212/708] drm/nouveau/pmu: Add missing callbacks for Tegra devices Fixes a crash booting on those platforms with nouveau. Fixes: 4cdd2450bf73 ("drm/nouveau/pmu/gm200-: use alternate falcon reset sequence") Cc: Ben Skeggs Cc: Karol Herbst Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: # v5.17+ Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220322124800.2605463-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index e1772211b0a4..612310d5d481 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -216,6 +216,7 @@ gm20b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gf100_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c index 6bf7fc1bd1e3..1a6f9c3af5ec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp102_pmu_reset(struct nvkm_pmu *pmu) { struct nvkm_device *device = pmu->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index ba1583bb618b..94cfb1791af6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -83,6 +83,7 @@ gp10b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gp102_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index bcaade758ff7..21abf31f4442 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); bool gf100_pmu_enabled(struct nvkm_pmu *); void gf100_pmu_reset(struct nvkm_pmu *); +void gp102_pmu_reset(struct nvkm_pmu *pmu); void gk110_pmu_pgob(struct nvkm_pmu *, bool); From a3e4bc23d5470b2beb7cc42a86b6a3e75b704c15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:59:20 -0600 Subject: [PATCH 213/708] io_uring: defer splice/tee file validity check until command issue In preparation for not using the file at prep time, defer checking if this file refers to a valid io_uring instance until issue time. This also means we can get rid of the cleanup flag for splice and tee. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9108c56bff5b..0152ef49cf46 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -654,10 +654,10 @@ struct io_epoll { struct io_splice { struct file *file_out; - struct file *file_in; loff_t off_out; loff_t off_in; u64 len; + int splice_fd_in; unsigned int flags; }; @@ -1687,14 +1687,6 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - - switch (req->opcode) { - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!S_ISREG(file_inode(req->splice.file_in)->i_mode)) - req->work.flags |= IO_WQ_WORK_UNBOUND; - break; - } } static void io_prep_async_link(struct io_kiocb *req) @@ -4369,18 +4361,11 @@ static int __io_splice_prep(struct io_kiocb *req, if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); - if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; - - sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in), - (sp->flags & SPLICE_F_FD_IN_FIXED)); - if (!sp->file_in) - return -EBADF; - req->flags |= REQ_F_NEED_CLEANUP; + sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); return 0; } @@ -4395,20 +4380,27 @@ static int io_tee_prep(struct io_kiocb *req, static int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -4427,15 +4419,22 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + in = io_file_get(req->ctx, req, sp->splice_fd_in, + (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (!in) { + ret = -EBADF; + goto done; + } + poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; @@ -4444,8 +4443,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -7176,11 +7174,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(io->free_iov); break; } - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(req->splice.file_in); - break; case IORING_OP_OPENAT: case IORING_OP_OPENAT2: if (req->open.filename) From 584b0180f0f4d67d7145950fe68c625f06c88b10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:48:05 -0600 Subject: [PATCH 214/708] io_uring: move read/write file prep state into actual opcode handler In preparation for not necessarily having a file assigned at prep time, defer any initialization associated with the file to when the opcode handler is run. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 101 ++++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0152ef49cf46..969f65de9972 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -592,7 +592,8 @@ struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; u64 addr; - u64 len; + u32 len; + u32 flags; }; struct io_connect { @@ -3178,42 +3179,11 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; - struct file *file = req->file; unsigned ioprio; int ret; - if (!io_req_ffs_set(req)) - req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; - kiocb->ki_pos = READ_ONCE(sqe->off); - kiocb->ki_flags = iocb_flags(file); - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); - if (unlikely(ret)) - return ret; - - /* - * If the file is marked O_NONBLOCK, still allow retry for it if it - * supports async. Otherwise it's impossible to use O_NONBLOCK files - * reliably. If not, or it IOCB_NOWAIT is set, don't retry. - */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) - req->flags |= REQ_F_NOWAIT; - - if (ctx->flags & IORING_SETUP_IOPOLL) { - if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) - return -EOPNOTSUPP; - - kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; - kiocb->ki_complete = io_complete_rw_iopoll; - req->iopoll_completed = 0; - } else { - if (kiocb->ki_flags & IOCB_HIPRI) - return -EINVAL; - kiocb->ki_complete = io_complete_rw; - } ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { @@ -3229,6 +3199,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = NULL; req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); + req->rw.flags = READ_ONCE(sqe->rw_flags); req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -3732,13 +3703,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) return 0; } -static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_READ))) - return -EBADF; - return io_prep_rw(req, sqe); -} - /* * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. @@ -3826,6 +3790,49 @@ static bool need_read_all(struct io_kiocb *req) S_ISBLK(file_inode(req->file)->i_mode); } +static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) +{ + struct kiocb *kiocb = &req->rw.kiocb; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = req->file; + int ret; + + if (unlikely(!file || !(file->f_mode & mode))) + return -EBADF; + + if (!io_req_ffs_set(req)) + req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; + + kiocb->ki_flags = iocb_flags(file); + ret = kiocb_set_rw_flags(kiocb, req->rw.flags); + if (unlikely(ret)) + return ret; + + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) + req->flags |= REQ_F_NOWAIT; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) + return -EOPNOTSUPP; + + kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; + kiocb->ki_complete = io_complete_rw_iopoll; + req->iopoll_completed = 0; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; + kiocb->ki_complete = io_complete_rw; + } + + return 0; +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3861,6 +3868,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_READ); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -3964,13 +3974,6 @@ out_free: return 0; } -static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_WRITE))) - return -EBADF; - return io_prep_rw(req, sqe); -} - static int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3991,6 +3994,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_WRITE); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -6987,11 +6993,10 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_READV: case IORING_OP_READ_FIXED: case IORING_OP_READ: - return io_read_prep(req, sqe); case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - return io_write_prep(req, sqe); + return io_prep_rw(req, sqe); case IORING_OP_POLL_ADD: return io_poll_add_prep(req, sqe); case IORING_OP_POLL_REMOVE: From fb39d30e227233498c8debe6a9fe3e7cf575c85f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 1 Apr 2022 13:51:34 -0300 Subject: [PATCH 215/708] cifs: force new session setup and tcon for dfs Do not reuse existing sessions and tcons in DFS failover as it might connect to different servers and shares. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/connect.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3ca06bd88b6e..54155eb4faac 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -453,9 +453,7 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_ return rc; } -static int -reconnect_dfs_server(struct TCP_Server_Info *server, - bool mark_smb_session) +static int reconnect_dfs_server(struct TCP_Server_Info *server) { int rc = 0; const char *refpath = server->current_fullpath + 1; @@ -479,7 +477,12 @@ reconnect_dfs_server(struct TCP_Server_Info *server, if (!cifs_tcp_ses_needs_reconnect(server, num_targets)) return 0; - cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + /* + * Unconditionally mark all sessions & tcons for reconnect as we might be connecting to a + * different server or share during failover. It could be improved by adding some logic to + * only do that in case it connects to a different server or share, though. + */ + cifs_mark_tcp_ses_conns_for_reconnect(server, true); cifs_abort_connection(server); @@ -537,7 +540,7 @@ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) } spin_unlock(&cifs_tcp_ses_lock); - return reconnect_dfs_server(server, mark_smb_session); + return reconnect_dfs_server(server); } #else int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) From 7cd1cc415dd8d0dca7244c9eafb9a0adc8036805 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 Apr 2022 17:50:59 -0500 Subject: [PATCH 216/708] cifs: update internal module number To 2.36 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 15a5c5db038b..c0542bdcd06b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -153,5 +153,5 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ #define SMB3_PRODUCT_BUILD 35 -#define CIFS_VERSION "2.35" +#define CIFS_VERSION "2.36" #endif /* _CIFSFS_H */ From e3d37210df5c41c51147a2d5d465de1a4d77be7a Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Mon, 4 Apr 2022 09:47:48 +1000 Subject: [PATCH 217/708] sctp: count singleton chunks in assoc user stats Singleton chunks (INIT, HEARTBEAT PMTU probes, and SHUTDOWN- COMPLETE) are not counted in SCTP_GET_ASOC_STATS "sas_octrlchunks" counter available to the assoc owner. These are all control chunks so they should be counted as such. Add counting of singleton chunks so they are properly accounted for. Fixes: 196d67593439 ("sctp: Add support to per-association statistics via a new SCTP_GET_ASSOC_STATS call") Signed-off-by: Jamie Bainbridge Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/c9ba8785789880cf07923b8a5051e174442ea9ee.1649029663.git.jamie.bainbridge@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/outqueue.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a18609f608fb..e213aaf45d67 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -914,6 +914,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) ctx->asoc->base.sk->sk_err = -error; return; } + ctx->asoc->stats.octrlchunks++; break; case SCTP_CID_ABORT: @@ -938,7 +939,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) case SCTP_CID_HEARTBEAT: if (chunk->pmtu_probe) { - sctp_packet_singleton(ctx->transport, chunk, ctx->gfp); + error = sctp_packet_singleton(ctx->transport, + chunk, ctx->gfp); + if (!error) + ctx->asoc->stats.octrlchunks++; break; } fallthrough; From 8047f98c8958d0f0c29882298ec293ff09ffea92 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 23 Mar 2022 16:48:23 +0100 Subject: [PATCH 218/708] dt-bindings: display: bridge: Drop requirement on input port for DSI devices MIPI-DSI devices, if they are controlled through the bus itself, have to be described as a child node of the controller they are attached to. Thus, there's no requirement on the controller having an OF-Graph output port to model the data stream: it's assumed that it would go from the parent to the child. However, some bridges controlled through the DSI bus still require an input OF-Graph port, thus requiring a controller with an OF-Graph output port. This prevents those bridges from being used with the controllers that do not have one without any particular reason to. Let's drop that requirement. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20220323154823.839469-1-maxime@cerno.tech --- .../devicetree/bindings/display/bridge/chipone,icn6211.yaml | 1 - .../devicetree/bindings/display/bridge/toshiba,tc358762.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml index 62c3bd4cb28d..7257fd0ae4da 100644 --- a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml +++ b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml @@ -51,7 +51,6 @@ properties: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml index 5216c27fc0ad..a412a1da950f 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml @@ -39,7 +39,6 @@ properties: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: From 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Mar 2022 22:03:41 +0100 Subject: [PATCH 219/708] sched/core: Fix forceidle balancing Steve reported that ChromeOS encounters the forceidle balancer being ran from rt_mutex_setprio()'s balance_callback() invocation and explodes. Now, the forceidle balancer gets queued every time the idle task gets selected, set_next_task(), which is strictly too often. rt_mutex_setprio() also uses set_next_task() in the 'change' pattern: queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); However, rt_mutex_setprio() will explicitly not run this pattern on the idle task (since priority boosting the idle task is quite insane). Most other 'change' pattern users are pidhash based and would also not apply to idle. Also, the change pattern doesn't contain a __balance_callback() invocation and hence we could have an out-of-band balance-callback, which *should* trigger the WARN in rq_pin_lock() (which guards against this exact anti-pattern). So while none of that explains how this happens, it does indicate that having it in set_next_task() might not be the most robust option. Instead, explicitly queue the forceidle balancer from pick_next_task() when it does indeed result in forceidle selection. Having it here, ensures it can only be triggered under the __schedule() rq->lock instance, and hence must be ran from that context. This also happens to clean up the code a little, so win-win. Fixes: d2dfa17bc7de ("sched: Trivial forced-newidle balancer") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Tested-by: T.J. Alumbaugh Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net --- kernel/sched/core.c | 14 ++++++++++---- kernel/sched/idle.c | 1 - kernel/sched/sched.h | 6 ------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d575b4914925..017ee7807930 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5752,6 +5752,8 @@ static inline struct task_struct *pick_task(struct rq *rq) extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); +static void queue_core_balance(struct rq *rq); + static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -5801,7 +5803,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } rq->core_pick = NULL; - return next; + goto out; } put_prev_task_balance(rq, prev, rf); @@ -5851,7 +5853,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) */ WARN_ON_ONCE(fi_before); task_vruntime_update(rq, next, false); - goto done; + goto out_set_next; } } @@ -5970,8 +5972,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) resched_curr(rq_i); } -done: +out_set_next: set_next_task(rq, next); +out: + if (rq->core->core_forceidle_count && next == rq->idle) + queue_core_balance(rq); + return next; } @@ -6066,7 +6072,7 @@ static void sched_core_balance(struct rq *rq) static DEFINE_PER_CPU(struct callback_head, core_balance_head); -void queue_core_balance(struct rq *rq) +static void queue_core_balance(struct rq *rq) { if (!sched_core_enabled(rq)) return; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8f8b5020e76a..ecb0d7052877 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -434,7 +434,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); - queue_core_balance(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58263f90c559..8dccb34eb190 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1232,8 +1232,6 @@ static inline bool sched_group_cookie_match(struct rq *rq, return false; } -extern void queue_core_balance(struct rq *rq); - static inline bool sched_core_enqueued(struct task_struct *p) { return !RB_EMPTY_NODE(&p->core_node); @@ -1267,10 +1265,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } -static inline void queue_core_balance(struct rq *rq) -{ -} - static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) { return true; From 386ef214c3c6ab111d05e1790e79475363abaa05 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Mar 2022 15:51:32 +0100 Subject: [PATCH 220/708] sched: Teach the forced-newidle balancer about CPU affinity limitation. try_steal_cookie() looks at task_struct::cpus_mask to decide if the task could be moved to `this' CPU. It ignores that the task might be in a migration disabled section while not on the CPU. In this case the task must not be moved otherwise per-CPU assumption are broken. Use is_cpu_allowed(), as suggested by Peter Zijlstra, to decide if the a task can be moved. Fixes: d2dfa17bc7de6 ("sched: Trivial forced-newidle balancer") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YjNK9El+3fzGmswf@linutronix.de --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017ee7807930..51efaabac3e4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6006,7 +6006,7 @@ static bool try_steal_cookie(int this, int that) if (p == src->core_pick || p == src->curr) goto next; - if (!cpumask_test_cpu(this, &p->cpus_mask)) + if (!is_cpu_allowed(p, this)) goto next; if (p->core_occupation > dst->idle->core_occupation) From 0a70045ed8516dfcff4b5728557e1ef3fd017c53 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 30 Mar 2022 10:43:28 +0200 Subject: [PATCH 221/708] entry: Fix compile error in dynamic_irqentry_exit_cond_resched() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/entry/common.c: In function ‘dynamic_irqentry_exit_cond_resched’: kernel/entry/common.c:409:14: error: implicit declaration of function ‘static_key_unlikely’; did you mean ‘static_key_enable’? [-Werror=implicit-function-declaration] 409 | if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) | ^~~~~~~~~~~~~~~~~~~ | static_key_enable static_key_unlikely() should be static_branch_unlikely(). Fixes: 99cf983cc8bca ("sched/preempt: Add PREEMPT_DYNAMIC using static keys") Signed-off-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220330084328.1805665-1-svens@linux.ibm.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e57a224d6b79..93c3b86e781c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -392,7 +392,7 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); void dynamic_irqentry_exit_cond_resched(void) { - if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) + if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) return; raw_irqentry_exit_cond_resched(); } From 1cd5f059d956e6f614ba6666ecdbcf95db05d5f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Mar 2022 21:24:38 +0100 Subject: [PATCH 222/708] x86,static_call: Fix __static_call_return0 for i386 Paolo reported that the instruction sequence that is used to replace: call __static_call_return0 namely: 66 66 48 31 c0 data16 data16 xor %rax,%rax decodes to something else on i386, namely: 66 66 48 data16 dec %ax 31 c0 xor %eax,%eax Which is a nonsensical sequence that happens to have the same outcome. *However* an important distinction is that it consists of 2 instructions which is a problem when the thing needs to be overwriten with a regular call instruction again. As such, replace the instruction with something that decodes the same on both i386 and x86_64. Fixes: 3f2a8fc4b15d ("static_call/x86: Add __static_call_return0()") Reported-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220318204419.GT8939@worktop.programming.kicks-ass.net --- arch/x86/kernel/static_call.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 531fb4cbb63f..aa72cefdd5be 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,10 +12,9 @@ enum insn_type { }; /* - * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax - * The REX.W cancels the effect of any data16. + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ -static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; +static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; From 8fd4ddda2f49a66bf5dd3d0c01966c4b1971308b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 12:49:36 +0100 Subject: [PATCH 223/708] static_call: Don't make __static_call_return0 static System.map shows that vmlinux contains several instances of __static_call_return0(): c0004fc0 t __static_call_return0 c0011518 t __static_call_return0 c00d8160 t __static_call_return0 arch_static_call_transform() uses the middle one to check whether we are setting a call to __static_call_return0 or not: c0011520 : c0011520: 3d 20 c0 01 lis r9,-16383 <== r9 = 0xc001 << 16 c0011524: 39 29 15 18 addi r9,r9,5400 <== r9 += 0x1518 c0011528: 7c 05 48 00 cmpw r5,r9 <== r9 has value 0xc0011518 here So if static_call_update() is called with one of the other instances of __static_call_return0(), arch_static_call_transform() won't recognise it. In order to work properly, global single instance of __static_call_return0() is required. Fixes: 3f2a8fc4b15d ("static_call/x86: Add __static_call_return0()") Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/30821468a0e7d28251954b578e5051dc09300d04.1647258493.git.christophe.leroy@csgroup.eu --- include/linux/static_call.h | 5 +- kernel/Makefile | 3 +- kernel/static_call.c | 541 ----------------------------------- kernel/static_call_inline.c | 543 ++++++++++++++++++++++++++++++++++++ 4 files changed, 546 insertions(+), 546 deletions(-) create mode 100644 kernel/static_call_inline.c diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3e56a9751c06..fcc5b48989b3 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -248,10 +248,7 @@ static inline int static_call_text_reserved(void *start, void *end) return 0; } -static inline long __static_call_return0(void) -{ - return 0; -} +extern long __static_call_return0(void); #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ diff --git a/kernel/Makefile b/kernel/Makefile index 471d71935e90..847a82bfe0e3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -114,7 +114,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o -obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/static_call.c b/kernel/static_call.c index f2b8baea35d2..e9c3e69f3837 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -1,549 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct static_call_site __start_static_call_sites[], - __stop_static_call_sites[]; -extern struct static_call_tramp_key __start_static_call_tramp_key[], - __stop_static_call_tramp_key[]; - -static bool static_call_initialized; - -/* mutex to protect key modules/sites */ -static DEFINE_MUTEX(static_call_mutex); - -static void static_call_lock(void) -{ - mutex_lock(&static_call_mutex); -} - -static void static_call_unlock(void) -{ - mutex_unlock(&static_call_mutex); -} - -static inline void *static_call_addr(struct static_call_site *site) -{ - return (void *)((long)site->addr + (long)&site->addr); -} - -static inline unsigned long __static_call_key(const struct static_call_site *site) -{ - return (long)site->key + (long)&site->key; -} - -static inline struct static_call_key *static_call_key(const struct static_call_site *site) -{ - return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); -} - -/* These assume the key is word-aligned. */ -static inline bool static_call_is_init(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_INIT; -} - -static inline bool static_call_is_tail(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_TAIL; -} - -static inline void static_call_set_init(struct static_call_site *site) -{ - site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - - (long)&site->key; -} - -static int static_call_site_cmp(const void *_a, const void *_b) -{ - const struct static_call_site *a = _a; - const struct static_call_site *b = _b; - const struct static_call_key *key_a = static_call_key(a); - const struct static_call_key *key_b = static_call_key(b); - - if (key_a < key_b) - return -1; - - if (key_a > key_b) - return 1; - - return 0; -} - -static void static_call_site_swap(void *_a, void *_b, int size) -{ - long delta = (unsigned long)_a - (unsigned long)_b; - struct static_call_site *a = _a; - struct static_call_site *b = _b; - struct static_call_site tmp = *a; - - a->addr = b->addr - delta; - a->key = b->key - delta; - - b->addr = tmp.addr + delta; - b->key = tmp.key + delta; -} - -static inline void static_call_sort_entries(struct static_call_site *start, - struct static_call_site *stop) -{ - sort(start, stop - start, sizeof(struct static_call_site), - static_call_site_cmp, static_call_site_swap); -} - -static inline bool static_call_key_has_mods(struct static_call_key *key) -{ - return !(key->type & 1); -} - -static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) -{ - if (!static_call_key_has_mods(key)) - return NULL; - - return key->mods; -} - -static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) -{ - if (static_call_key_has_mods(key)) - return NULL; - - return (struct static_call_site *)(key->type & ~1); -} - -void __static_call_update(struct static_call_key *key, void *tramp, void *func) -{ - struct static_call_site *site, *stop; - struct static_call_mod *site_mod, first; - - cpus_read_lock(); - static_call_lock(); - - if (key->func == func) - goto done; - - key->func = func; - - arch_static_call_transform(NULL, tramp, func, false); - - /* - * If uninitialized, we'll not update the callsites, but they still - * point to the trampoline and we just patched that. - */ - if (WARN_ON_ONCE(!static_call_initialized)) - goto done; - - first = (struct static_call_mod){ - .next = static_call_key_next(key), - .mod = NULL, - .sites = static_call_key_sites(key), - }; - - for (site_mod = &first; site_mod; site_mod = site_mod->next) { - bool init = system_state < SYSTEM_RUNNING; - struct module *mod = site_mod->mod; - - if (!site_mod->sites) { - /* - * This can happen if the static call key is defined in - * a module which doesn't use it. - * - * It also happens in the has_mods case, where the - * 'first' entry has no sites associated with it. - */ - continue; - } - - stop = __stop_static_call_sites; - - if (mod) { -#ifdef CONFIG_MODULES - stop = mod->static_call_sites + - mod->num_static_call_sites; - init = mod->state == MODULE_STATE_COMING; -#endif - } - - for (site = site_mod->sites; - site < stop && static_call_key(site) == key; site++) { - void *site_addr = static_call_addr(site); - - if (!init && static_call_is_init(site)) - continue; - - if (!kernel_text_address((unsigned long)site_addr)) { - /* - * This skips patching built-in __exit, which - * is part of init_section_contains() but is - * not part of kernel_text_address(). - * - * Skipping built-in __exit is fine since it - * will never be executed. - */ - WARN_ONCE(!static_call_is_init(site), - "can't patch static call site at %pS", - site_addr); - continue; - } - - arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); - } - } - -done: - static_call_unlock(); - cpus_read_unlock(); -} -EXPORT_SYMBOL_GPL(__static_call_update); - -static int __static_call_init(struct module *mod, - struct static_call_site *start, - struct static_call_site *stop) -{ - struct static_call_site *site; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod; - - if (start == stop) - return 0; - - static_call_sort_entries(start, stop); - - for (site = start; site < stop; site++) { - void *site_addr = static_call_addr(site); - - if ((mod && within_module_init((unsigned long)site_addr, mod)) || - (!mod && init_section_contains(site_addr, 1))) - static_call_set_init(site); - - key = static_call_key(site); - if (key != prev_key) { - prev_key = key; - - /* - * For vmlinux (!mod) avoid the allocation by storing - * the sites pointer in the key itself. Also see - * __static_call_update()'s @first. - * - * This allows architectures (eg. x86) to call - * static_call_init() before memory allocation works. - */ - if (!mod) { - key->sites = site; - key->type |= 1; - goto do_transform; - } - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - - /* - * When the key has a direct sites pointer, extract - * that into an explicit struct static_call_mod, so we - * can have a list of modules. - */ - if (static_call_key_sites(key)) { - site_mod->mod = NULL; - site_mod->next = NULL; - site_mod->sites = static_call_key_sites(key); - - key->mods = site_mod; - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - } - - site_mod->mod = mod; - site_mod->sites = site; - site_mod->next = static_call_key_next(key); - key->mods = site_mod; - } - -do_transform: - arch_static_call_transform(site_addr, NULL, key->func, - static_call_is_tail(site)); - } - - return 0; -} - -static int addr_conflict(struct static_call_site *site, void *start, void *end) -{ - unsigned long addr = (unsigned long)static_call_addr(site); - - if (addr <= (unsigned long)end && - addr + CALL_INSN_SIZE > (unsigned long)start) - return 1; - - return 0; -} - -static int __static_call_text_reserved(struct static_call_site *iter_start, - struct static_call_site *iter_stop, - void *start, void *end, bool init) -{ - struct static_call_site *iter = iter_start; - - while (iter < iter_stop) { - if (init || !static_call_is_init(iter)) { - if (addr_conflict(iter, start, end)) - return 1; - } - iter++; - } - - return 0; -} - -#ifdef CONFIG_MODULES - -static int __static_call_mod_text_reserved(void *start, void *end) -{ - struct module *mod; - int ret; - - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - - if (!mod) - return 0; - - ret = __static_call_text_reserved(mod->static_call_sites, - mod->static_call_sites + mod->num_static_call_sites, - start, end, mod->state == MODULE_STATE_COMING); - - module_put(mod); - - return ret; -} - -static unsigned long tramp_key_lookup(unsigned long addr) -{ - struct static_call_tramp_key *start = __start_static_call_tramp_key; - struct static_call_tramp_key *stop = __stop_static_call_tramp_key; - struct static_call_tramp_key *tramp_key; - - for (tramp_key = start; tramp_key != stop; tramp_key++) { - unsigned long tramp; - - tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; - if (tramp == addr) - return (long)tramp_key->key + (long)&tramp_key->key; - } - - return 0; -} - -static int static_call_add_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = start + mod->num_static_call_sites; - struct static_call_site *site; - - for (site = start; site != stop; site++) { - unsigned long s_key = __static_call_key(site); - unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; - unsigned long key; - - /* - * Is the key is exported, 'addr' points to the key, which - * means modules are allowed to call static_call_update() on - * it. - * - * Otherwise, the key isn't exported, and 'addr' points to the - * trampoline so we need to lookup the key. - * - * We go through this dance to prevent crazy modules from - * abusing sensitive static calls. - */ - if (!kernel_text_address(addr)) - continue; - - key = tramp_key_lookup(addr); - if (!key) { - pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", - static_call_addr(site)); - return -EINVAL; - } - - key |= s_key & STATIC_CALL_SITE_FLAGS; - site->key = key - (long)&site->key; - } - - return __static_call_init(mod, start, stop); -} - -static void static_call_del_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = mod->static_call_sites + - mod->num_static_call_sites; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod, **prev; - struct static_call_site *site; - - for (site = start; site < stop; site++) { - key = static_call_key(site); - if (key == prev_key) - continue; - - prev_key = key; - - for (prev = &key->mods, site_mod = key->mods; - site_mod && site_mod->mod != mod; - prev = &site_mod->next, site_mod = site_mod->next) - ; - - if (!site_mod) - continue; - - *prev = site_mod->next; - kfree(site_mod); - } -} - -static int static_call_module_notify(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct module *mod = data; - int ret = 0; - - cpus_read_lock(); - static_call_lock(); - - switch (val) { - case MODULE_STATE_COMING: - ret = static_call_add_module(mod); - if (ret) { - WARN(1, "Failed to allocate memory for static calls"); - static_call_del_module(mod); - } - break; - case MODULE_STATE_GOING: - static_call_del_module(mod); - break; - } - - static_call_unlock(); - cpus_read_unlock(); - - return notifier_from_errno(ret); -} - -static struct notifier_block static_call_module_nb = { - .notifier_call = static_call_module_notify, -}; - -#else - -static inline int __static_call_mod_text_reserved(void *start, void *end) -{ - return 0; -} - -#endif /* CONFIG_MODULES */ - -int static_call_text_reserved(void *start, void *end) -{ - bool init = system_state < SYSTEM_RUNNING; - int ret = __static_call_text_reserved(__start_static_call_sites, - __stop_static_call_sites, start, end, init); - - if (ret) - return ret; - - return __static_call_mod_text_reserved(start, end); -} - -int __init static_call_init(void) -{ - int ret; - - if (static_call_initialized) - return 0; - - cpus_read_lock(); - static_call_lock(); - ret = __static_call_init(NULL, __start_static_call_sites, - __stop_static_call_sites); - static_call_unlock(); - cpus_read_unlock(); - - if (ret) { - pr_err("Failed to allocate memory for static_call!\n"); - BUG(); - } - - static_call_initialized = true; - -#ifdef CONFIG_MODULES - register_module_notifier(&static_call_module_nb); -#endif - return 0; -} -early_initcall(static_call_init); long __static_call_return0(void) { return 0; } EXPORT_SYMBOL_GPL(__static_call_return0); - -#ifdef CONFIG_STATIC_CALL_SELFTEST - -static int func_a(int x) -{ - return x+1; -} - -static int func_b(int x) -{ - return x+2; -} - -DEFINE_STATIC_CALL(sc_selftest, func_a); - -static struct static_call_data { - int (*func)(int); - int val; - int expect; -} static_call_data [] __initdata = { - { NULL, 2, 3 }, - { func_b, 2, 4 }, - { func_a, 2, 3 } -}; - -static int __init test_static_call_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { - struct static_call_data *scd = &static_call_data[i]; - - if (scd->func) - static_call_update(sc_selftest, scd->func); - - WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); - } - - return 0; -} -early_initcall(test_static_call_init); - -#endif /* CONFIG_STATIC_CALL_SELFTEST */ diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c new file mode 100644 index 000000000000..dc5665b62814 --- /dev/null +++ b/kernel/static_call_inline.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; +extern struct static_call_tramp_key __start_static_call_tramp_key[], + __stop_static_call_tramp_key[]; + +static bool static_call_initialized; + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +static inline bool static_call_key_has_mods(struct static_call_key *key) +{ + return !(key->type & 1); +} + +static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) +{ + if (!static_call_key_has_mods(key)) + return NULL; + + return key->mods; +} + +static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) +{ + if (static_call_key_has_mods(key)) + return NULL; + + return (struct static_call_site *)(key->type & ~1); +} + +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + struct static_call_site *site, *stop; + struct static_call_mod *site_mod, first; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + arch_static_call_transform(NULL, tramp, func, false); + + /* + * If uninitialized, we'll not update the callsites, but they still + * point to the trampoline and we just patched that. + */ + if (WARN_ON_ONCE(!static_call_initialized)) + goto done; + + first = (struct static_call_mod){ + .next = static_call_key_next(key), + .mod = NULL, + .sites = static_call_key_sites(key), + }; + + for (site_mod = &first; site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; + struct module *mod = site_mod->mod; + + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + * + * It also happens in the has_mods case, where the + * 'first' entry has no sites associated with it. + */ + continue; + } + + stop = __stop_static_call_sites; + + if (mod) { +#ifdef CONFIG_MODULES + stop = mod->static_call_sites + + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; +#endif + } + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + + if (!init && static_call_is_init(site)) + continue; + + if (!kernel_text_address((unsigned long)site_addr)) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +static int __static_call_init(struct module *mod, + struct static_call_site *start, + struct static_call_site *stop) +{ + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if ((mod && within_module_init((unsigned long)site_addr, mod)) || + (!mod && init_section_contains(site_addr, 1))) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + /* + * For vmlinux (!mod) avoid the allocation by storing + * the sites pointer in the key itself. Also see + * __static_call_update()'s @first. + * + * This allows architectures (eg. x86) to call + * static_call_init() before memory allocation works. + */ + if (!mod) { + key->sites = site; + key->type |= 1; + goto do_transform; + } + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + /* + * When the key has a direct sites pointer, extract + * that into an explicit struct static_call_mod, so we + * can have a list of modules. + */ + if (static_call_key_sites(key)) { + site_mod->mod = NULL; + site_mod->next = NULL; + site_mod->sites = static_call_key_sites(key); + + key->mods = site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + } + + site_mod->mod = mod; + site_mod->sites = site; + site_mod->next = static_call_key_next(key); + key->mods = site_mod; + } + +do_transform: + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); + } + + return 0; +} + +static int addr_conflict(struct static_call_site *site, void *start, void *end) +{ + unsigned long addr = (unsigned long)static_call_addr(site); + + if (addr <= (unsigned long)end && + addr + CALL_INSN_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +static int __static_call_text_reserved(struct static_call_site *iter_start, + struct static_call_site *iter_stop, + void *start, void *end, bool init) +{ + struct static_call_site *iter = iter_start; + + while (iter < iter_stop) { + if (init || !static_call_is_init(iter)) { + if (addr_conflict(iter, start, end)) + return 1; + } + iter++; + } + + return 0; +} + +#ifdef CONFIG_MODULES + +static int __static_call_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + int ret; + + preempt_disable(); + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + preempt_enable(); + + if (!mod) + return 0; + + ret = __static_call_text_reserved(mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites, + start, end, mod->state == MODULE_STATE_COMING); + + module_put(mod); + + return ret; +} + +static unsigned long tramp_key_lookup(unsigned long addr) +{ + struct static_call_tramp_key *start = __start_static_call_tramp_key; + struct static_call_tramp_key *stop = __stop_static_call_tramp_key; + struct static_call_tramp_key *tramp_key; + + for (tramp_key = start; tramp_key != stop; tramp_key++) { + unsigned long tramp; + + tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; + if (tramp == addr) + return (long)tramp_key->key + (long)&tramp_key->key; + } + + return 0; +} + +static int static_call_add_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = start + mod->num_static_call_sites; + struct static_call_site *site; + + for (site = start; site != stop; site++) { + unsigned long s_key = __static_call_key(site); + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; + unsigned long key; + + /* + * Is the key is exported, 'addr' points to the key, which + * means modules are allowed to call static_call_update() on + * it. + * + * Otherwise, the key isn't exported, and 'addr' points to the + * trampoline so we need to lookup the key. + * + * We go through this dance to prevent crazy modules from + * abusing sensitive static calls. + */ + if (!kernel_text_address(addr)) + continue; + + key = tramp_key_lookup(addr); + if (!key) { + pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", + static_call_addr(site)); + return -EINVAL; + } + + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)&site->key; + } + + return __static_call_init(mod, start, stop); +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod, **prev; + struct static_call_site *site; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + + prev_key = key; + + for (prev = &key->mods, site_mod = key->mods; + site_mod && site_mod->mod != mod; + prev = &site_mod->next, site_mod = site_mod->next) + ; + + if (!site_mod) + continue; + + *prev = site_mod->next; + kfree(site_mod); + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + ret = static_call_add_module(mod); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#else + +static inline int __static_call_mod_text_reserved(void *start, void *end) +{ + return 0; +} + +#endif /* CONFIG_MODULES */ + +int static_call_text_reserved(void *start, void *end) +{ + bool init = system_state < SYSTEM_RUNNING; + int ret = __static_call_text_reserved(__start_static_call_sites, + __stop_static_call_sites, start, end, init); + + if (ret) + return ret; + + return __static_call_mod_text_reserved(start, end); +} + +int __init static_call_init(void) +{ + int ret; + + if (static_call_initialized) + return 0; + + cpus_read_lock(); + static_call_lock(); + ret = __static_call_init(NULL, __start_static_call_sites, + __stop_static_call_sites); + static_call_unlock(); + cpus_read_unlock(); + + if (ret) { + pr_err("Failed to allocate memory for static_call!\n"); + BUG(); + } + + static_call_initialized = true; + +#ifdef CONFIG_MODULES + register_module_notifier(&static_call_module_nb); +#endif + return 0; +} +early_initcall(static_call_init); + +#ifdef CONFIG_STATIC_CALL_SELFTEST + +static int func_a(int x) +{ + return x+1; +} + +static int func_b(int x) +{ + return x+2; +} + +DEFINE_STATIC_CALL(sc_selftest, func_a); + +static struct static_call_data { + int (*func)(int); + int val; + int expect; +} static_call_data [] __initdata = { + { NULL, 2, 3 }, + { func_b, 2, 4 }, + { func_a, 2, 3 } +}; + +static int __init test_static_call_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { + struct static_call_data *scd = &static_call_data[i]; + + if (scd->func) + static_call_update(sc_selftest, scd->func); + + WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); + } + + return 0; +} +early_initcall(test_static_call_init); + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ From 5517d500829c683a358a8de04ecb2e28af629ae5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 11:27:35 +0100 Subject: [PATCH 224/708] static_call: Properly initialise DEFINE_STATIC_CALL_RET0() When a static call is updated with __static_call_return0() as target, arch_static_call_transform() set it to use an optimised set of instructions which are meant to lay in the same cacheline. But when initialising a static call with DEFINE_STATIC_CALL_RET0(), we get a branch to the real __static_call_return0() function instead of getting the optimised setup: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 4b ff ff f4 b c00d8114 <__static_call_return0> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture to setup the optimised configuration, and rework DEFINE_STATIC_CALL_RET0() to call it: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 48 00 00 14 b c00d8134 <__SCT__perf_snapshot_branch_stack+0x14> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/static_call.h | 1 + arch/x86/include/asm/static_call.h | 2 ++ include/linux/static_call.h | 20 +++++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h index 0a0bc79bd1fa..de1018cc522b 100644 --- a/arch/powerpc/include/asm/static_call.h +++ b/arch/powerpc/include/asm/static_call.h @@ -24,5 +24,6 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20") #endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index ed4f8bb6c2d9..2455d721503e 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -38,6 +38,8 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) #define ARCH_ADD_TRAMP_KEY(name) \ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index fcc5b48989b3..3c50b0fdda16 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -196,6 +196,14 @@ extern long __static_call_return0(void); }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ @@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; } }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) #define static_call_cond(name) (void)__static_call(name) @@ -284,6 +298,9 @@ static inline long __static_call_return0(void) .func = NULL, \ } +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, __static_call_return0) + static inline void __static_call_nop(void) { } /* @@ -327,7 +344,4 @@ static inline int static_call_text_reserved(void *start, void *end) #define DEFINE_STATIC_CALL(name, _func) \ __DEFINE_STATIC_CALL(name, _func, _func) -#define DEFINE_STATIC_CALL_RET0(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, __static_call_return0) - #endif /* _LINUX_STATIC_CALL_H */ From df21c0d7a94db64a4e1a0d070e26fb02e60fefab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Mar 2022 11:27:36 +0100 Subject: [PATCH 225/708] static_call: Remove __DEFINE_STATIC_CALL macro Only DEFINE_STATIC_CALL use __DEFINE_STATIC_CALL macro now when CONFIG_HAVE_STATIC_CALL is selected. Only keep __DEFINE_STATIC_CALL() for the generic fallback, and also use it to implement DEFINE_STATIC_CALL_NULL() in that case. Signed-off-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/329074f92d96e3220ebe15da7bbe2779beee31eb.1647253456.git.christophe.leroy@csgroup.eu --- include/linux/static_call.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3c50b0fdda16..df53bed9d71f 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -180,13 +180,13 @@ extern int static_call_text_reserved(void *start, void *end); extern long __static_call_return0(void); -#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ +#define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func_init, \ + .func = _func, \ .type = 1, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -225,12 +225,12 @@ extern long __static_call_return0(void); static inline int static_call_init(void) { return 0; } -#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ +#define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func_init, \ + .func = _func, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) #define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -292,11 +292,11 @@ static inline long __static_call_return0(void) .func = _func_init, \ } +#define DEFINE_STATIC_CALL(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, _func) + #define DEFINE_STATIC_CALL_NULL(name, _func) \ - DECLARE_STATIC_CALL(name, _func); \ - struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = NULL, \ - } + __DEFINE_STATIC_CALL(name, _func, NULL) #define DEFINE_STATIC_CALL_RET0(name, _func) \ __DEFINE_STATIC_CALL(name, _func, __static_call_return0) @@ -341,7 +341,4 @@ static inline int static_call_text_reserved(void *start, void *end) #endif /* CONFIG_HAVE_STATIC_CALL */ -#define DEFINE_STATIC_CALL(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, _func) - #endif /* _LINUX_STATIC_CALL_H */ From 1c1e7e3c23dd25f938302428eeb22c3dda2c3427 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:08 +0200 Subject: [PATCH 226/708] x86/percpu: Remove volatile from arch_raw_cpu_ptr(). The volatile attribute in the inline assembly of arch_raw_cpu_ptr() forces the compiler to always generate the code, even if the compiler can decide upfront that its result is not needed. For instance invoking __intel_pmu_disable_all(false) (like intel_pmu_snapshot_arch_branch_stack() does) leads to loading the address of &cpu_hw_events into the register while compiler knows that it has no need for it. This ends up with code like: | movq $cpu_hw_events, %rax #, tcp_ptr__ | add %gs:this_cpu_off(%rip), %rax # this_cpu_off, tcp_ptr__ | xorl %eax, %eax # tmp93 It also creates additional code within local_lock() with !RT && !LOCKDEP which is not desired. By removing the volatile attribute the compiler can place the function freely and avoid it if it is not needed in the end. By using the function twice the compiler properly caches only the variable offset and always loads the CPU-offset. this_cpu_ptr() also remains properly placed within a preempt_disable() sections because - arch_raw_cpu_ptr() assembly has a memory input ("m" (this_cpu_off)) - prempt_{dis,en}able() fundamentally has a 'barrier()' in it Therefore this_cpu_ptr() is already properly serialized and does not rely on the 'volatile' attribute. Remove volatile from arch_raw_cpu_ptr(). [ bigeasy: Added Linus' explanation why this_cpu_ptr() is not moved out of a preempt_disable() section without the 'volatile' attribute. ] Suggested-by: Linus Torvalds Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-2-bigeasy@linutronix.de --- arch/x86/include/asm/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a3c33b79fb86..13c0d63ed55e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -38,9 +38,9 @@ #define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - asm volatile("add " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (this_cpu_off), "0" (ptr)); \ + asm ("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) #else From 2d2f8f083ef29e9b7adfe5cb421368331543473f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:09 +0200 Subject: [PATCH 227/708] Revert "locking/local_lock: Make the empty local_lock_*() function a macro." With volatile removed from arch_raw_cpu_ptr() the compiler no longer creates the per-CPU reference. The usage of the macro can be reverted now. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-3-bigeasy@linutronix.de --- include/linux/local_lock_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 6d635e8306d6..975e33b793a7 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) -# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0) -# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0) -# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_lock_release(local_lock_t *l) { } +static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } From 273ba85b5e8b971ed28eb5c17e1638543be9237d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 28 Mar 2022 16:58:10 +0200 Subject: [PATCH 228/708] Revert "mm/page_alloc: mark pagesets as __maybe_unused" The local_lock() is now using a proper static inline function which is enough for llvm to accept that the variable is used. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220328145810.86783-4-bigeasy@linutronix.de --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2db95780e003..6e5b4488a0c5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -128,7 +128,7 @@ static DEFINE_MUTEX(pcp_batch_high_lock); struct pagesets { local_lock_t lock; }; -static DEFINE_PER_CPU(struct pagesets, pagesets) __maybe_unused = { +static DEFINE_PER_CPU(struct pagesets, pagesets) = { .lock = INIT_LOCAL_LOCK(lock), }; From c61759e581576d3330bd1d9490b4d7552e24da6b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:57 -0700 Subject: [PATCH 229/708] perf/x86: Add Intel Raptor Lake support From PMU's perspective, Raptor Lake is the same as the Alder Lake. The only difference is the event list, which will be supported in the perf tool later. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e88791b420ee..28f075e00c7a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6212,6 +6212,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: /* * Alder Lake has 2 types of CPU, core and atom. * From 2da202aa1c38bfe8841611a3d339892eb5579e2b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:58 -0700 Subject: [PATCH 230/708] perf/x86/cstate: Add Raptor Lake support Raptor Lake is Intel's successor to Alder lake. From the perspective of Intel cstate residency counters, there is nothing changed compared with Alder lake. Share adl_cstates with Alder lake. Update the comments for Raptor Lake. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/cstate.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index c6262b154c3a..5d7762288a24 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,7 +40,7 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL + * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,49 +51,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL + * ICL,TGL,RKL,ADL,RPL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL + * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, + * RPL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL + * KBL,CML,ICL,TGL,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL + * TNT,RKL,ADL,RPL * Scope: Package (physical package) * */ @@ -680,6 +681,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); From 82cd83047a9a80e52c0849e56885279166215310 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:45:59 -0700 Subject: [PATCH 231/708] perf/x86/msr: Add Raptor Lake CPU support Raptor Lake is Intel's successor to Alder lake. PPERF and SMI_COUNT MSRs are also supported. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-3-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 96c775abe31f..6d759f88315c 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; From ad4878d4d71d9ada913be2ad5b6d7f526a695b6f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Mar 2022 10:46:00 -0700 Subject: [PATCH 232/708] perf/x86/uncore: Add Raptor Lake uncore support The uncore PMU of the Raptor Lake is the same as Alder Lake. Add new PCIIDs of IMC for Raptor Lake. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/1647366360-82824-4-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 1 + arch/x86/events/intel/uncore_snb.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index e497da9bf427..7695dcae280e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f698a55bde81..4262351f52b6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -79,6 +79,10 @@ #define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 +#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700 +#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702 +#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ } }; From e3265a4386428d3d157d9565bb520aabff8b4bf0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 28 Mar 2022 13:01:12 -0700 Subject: [PATCH 233/708] perf/core: Inherit event_caps It was reported that some perf event setup can make fork failed on ARM64. It was the case of a group of mixed hw and sw events and it failed in perf_event_init_task() due to armpmu_event_init(). The ARM PMU code checks if all the events in a group belong to the same PMU except for software events. But it didn't set the event_caps of inherited events and no longer identify them as software events. Therefore the test failed in a child process. A simple reproducer is: $ perf stat -e '{cycles,cs,instructions}' perf bench sched messaging # Running 'sched/messaging' benchmark: perf: fork(): Invalid argument The perf stat was fine but the perf bench failed in fork(). Let's inherit the event caps from the parent. Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/20220328200112.457740-1-namhyung@kernel.org --- kernel/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index cfde994ce61c..3980efcf931d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11635,6 +11635,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->state = PERF_EVENT_STATE_INACTIVE; + if (parent_event) + event->event_caps = parent_event->event_caps; + if (event->attr.sigtrap) atomic_set(&event->event_limit, 1); From 4a263bf331c512849062805ef1b4ac40301a9829 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Mar 2022 08:49:02 -0700 Subject: [PATCH 234/708] perf/x86/intel: Don't extend the pseudo-encoding to GP counters The INST_RETIRED.PREC_DIST event (0x0100) doesn't count on SPR. perf stat -e cpu/event=0xc0,umask=0x0/,cpu/event=0x0,umask=0x1/ -C0 Performance counter stats for 'CPU(s) 0': 607,246 cpu/event=0xc0,umask=0x0/ 0 cpu/event=0x0,umask=0x1/ The encoding for INST_RETIRED.PREC_DIST is pseudo-encoding, which doesn't work on the generic counters. However, current perf extends its mask to the generic counters. The pseudo event-code for a fixed counter must be 0x00. Check and avoid extending the mask for the fixed counter event which using the pseudo-encoding, e.g., ref-cycles and PREC_DIST event. With the patch, perf stat -e cpu/event=0xc0,umask=0x0/,cpu/event=0x0,umask=0x1/ -C0 Performance counter stats for 'CPU(s) 0': 583,184 cpu/event=0xc0,umask=0x0/ 583,048 cpu/event=0x0,umask=0x1/ Fixes: 2de71ee153ef ("perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1648482543-14923-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 6 +++++- arch/x86/include/asm/perf_event.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 28f075e00c7a..eb17b968ca52 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con /* Disabled fixed counters which are not in CPUID */ c->idxmsk64 &= intel_ctrl; - if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) c->idxmsk64 |= (1ULL << num_counters) - 1; } c->idxmsk64 &= diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 58d9e4b1fa0a..b06e4c573add 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -241,6 +241,11 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + /* * We model BTS tracing as another fixed-mode PMC. * From e590928de7547454469693da9bc7ffd562e54b7e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Mar 2022 08:49:03 -0700 Subject: [PATCH 235/708] perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids On Sapphire Rapids, the FRONTEND_RETIRED.MS_FLOWS event requires the FRONTEND MSR value 0x8. However, the current FRONTEND MSR mask doesn't support it. Update intel_spr_extra_regs[] to support it. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1648482543-14923-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index eb17b968ca52..fc7f458eb3de 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), EVENT_EXTRA_END From a0827713e298d021d3c79ae7423aea408f3f7c3a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:20 +0800 Subject: [PATCH 236/708] perf/core: Don't pass task around when ctx sched in The current code pass task around for ctx_sched_in(), only to get perf_cgroup of the task, then update the timestamp of it and its ancestors and set them to active. But we can use cpuctx->cgrp to get active perf_cgroup and its ancestors since cpuctx->cgrp has been set before ctx_sched_in(). This patch remove the task argument in ctx_sched_in() and cleanup related code. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-2-zhouchengming@bytedance.com --- kernel/events/core.c | 58 ++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3980efcf931d..65450200691c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -574,8 +574,7 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, enum event_type_t event_type); static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task); + enum event_type_t event_type); static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); @@ -801,10 +800,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, - struct perf_event_context *ctx) +perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { - struct perf_cgroup *cgrp; + struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_cgroup *cgrp = cpuctx->cgrp; struct perf_cgroup_info *info; struct cgroup_subsys_state *css; @@ -813,10 +812,10 @@ perf_cgroup_set_timestamp(struct task_struct *task, * ensure we do not access cgroup data * unless we have the cgroup pinned (css_get) */ - if (!task || !ctx->nr_cgroups) + if (!cgrp) return; - cgrp = perf_cgroup_from_task(task, ctx); + WARN_ON_ONCE(!ctx->nr_cgroups); for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); @@ -869,14 +868,14 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) WARN_ON_ONCE(cpuctx->cgrp); /* * set cgrp before ctxsw in to allow - * event_filter_match() to not have to pass - * task around + * perf_cgroup_set_timestamp() in ctx_sched_in() + * to not have to pass task around * we pass the cpuctx->ctx to perf_cgroup_from_task() * because cgorup events are only per-cpu */ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); + cpu_ctx_sched_in(cpuctx, EVENT_ALL); } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -1118,8 +1117,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, - struct perf_event_context *ctx) +perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { } @@ -2713,8 +2711,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task); + enum event_type_t event_type); static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, @@ -2730,15 +2727,14 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx, } static void perf_event_sched_in(struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, - struct task_struct *task) + struct perf_event_context *ctx) { - cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_PINNED); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_PINNED); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); } /* @@ -2788,7 +2784,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, else if (ctx_event_type & EVENT_PINNED) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, task_ctx, current); + perf_event_sched_in(cpuctx, task_ctx); perf_pmu_enable(cpuctx->ctx.pmu); } @@ -3011,7 +3007,7 @@ static void __perf_event_enable(struct perf_event *event, return; if (!event_filter_match(event)) { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); return; } @@ -3020,7 +3016,7 @@ static void __perf_event_enable(struct perf_event *event, * then don't put it on unless the group is on. */ if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); return; } @@ -3865,8 +3861,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task) + enum event_type_t event_type) { int is_active = ctx->is_active; @@ -3878,7 +3873,7 @@ ctx_sched_in(struct perf_event_context *ctx, if (is_active ^ EVENT_TIME) { /* start ctx time */ __update_context_time(ctx, false); - perf_cgroup_set_timestamp(task, ctx); + perf_cgroup_set_timestamp(cpuctx); /* * CPU-release for the below ->is_active store, * see __load_acquire() in perf_event_time_now() @@ -3909,12 +3904,11 @@ ctx_sched_in(struct perf_event_context *ctx, } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type, - struct task_struct *task) + enum event_type_t event_type) { struct perf_event_context *ctx = &cpuctx->ctx; - ctx_sched_in(ctx, cpuctx, event_type, task); + ctx_sched_in(ctx, cpuctx, event_type); } static void perf_event_context_sched_in(struct perf_event_context *ctx, @@ -3956,7 +3950,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, ctx, task); + perf_event_sched_in(cpuctx, ctx); if (cpuctx->sched_cb_usage && pmu->sched_task) pmu->sched_task(cpuctx->task_ctx, true); @@ -4267,7 +4261,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) if (cpu_event) rotate_ctx(&cpuctx->ctx, cpu_event); - perf_event_sched_in(cpuctx, task_ctx, current); + perf_event_sched_in(cpuctx, task_ctx); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); @@ -4339,7 +4333,7 @@ static void perf_event_enable_on_exec(int ctxn) clone_ctx = unclone_ctx(ctx); ctx_resched(cpuctx, ctx, event_type); } else { - ctx_sched_in(ctx, cpuctx, EVENT_TIME, current); + ctx_sched_in(ctx, cpuctx, EVENT_TIME); } perf_ctx_unlock(cpuctx, ctx); From 6875186aea5ce09a644758d9193265da1cc187c7 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:21 +0800 Subject: [PATCH 237/708] perf/core: Use perf_cgroup_info->active to check if cgroup is active Since we use perf_cgroup_set_timestamp() to start cgroup time and set active to 1, then use update_cgrp_time_from_cpuctx() to stop cgroup time and set active to 0. We can use info->active directly to check if cgroup is active. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-3-zhouchengming@bytedance.com --- kernel/events/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 65450200691c..a08fb92b3934 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -780,7 +780,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, static inline void update_cgrp_time_from_event(struct perf_event *event) { struct perf_cgroup_info *info; - struct perf_cgroup *cgrp; /* * ensure we access cgroup data only when needed and @@ -789,14 +788,12 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) if (!is_cgroup_event(event)) return; - cgrp = perf_cgroup_from_task(current, event->ctx); + info = this_cpu_ptr(event->cgrp->info); /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { - info = this_cpu_ptr(event->cgrp->info); + if (info->active) __update_cgrp_time(info, perf_clock(), true); - } } static inline void From 96492a6c558acb56124844d1409d9ef8624a0322 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:22 +0800 Subject: [PATCH 238/708] perf/core: Fix perf_cgroup_switch() There is a race problem that can trigger WARN_ON_ONCE(cpuctx->cgrp) in perf_cgroup_switch(). CPU1 CPU2 perf_cgroup_sched_out(prev, next) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) perf_cgroup_switch(prev, PERF_CGROUP_SWOUT) cgroup_migrate_execute() task->cgroups = ? perf_cgroup_attach() task_function_call(task, __perf_cgroup_move) perf_cgroup_sched_in(prev, next) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) perf_cgroup_switch(next, PERF_CGROUP_SWIN) __perf_cgroup_move() perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN) The commit a8d757ef076f ("perf events: Fix slow and broken cgroup context switch code") want to skip perf_cgroup_switch() when the perf_cgroup of "prev" and "next" are the same. But task->cgroups can change in concurrent with context_switch() in cgroup_migrate_execute(). If cgrp1 == cgrp2 in sched_out(), cpuctx won't do sched_out. Then task->cgroups changed cause cgrp1 != cgrp2 in sched_in(), cpuctx will do sched_in. So trigger WARN_ON_ONCE(cpuctx->cgrp). Even though __perf_cgroup_move() will be synchronized as the context switch disables the interrupt, context_switch() still can see the task->cgroups is changing in the middle, since task->cgroups changed before sending IPI. So we have to combine perf_cgroup_sched_in() into perf_cgroup_sched_out(), unified into perf_cgroup_switch(), to fix the incosistency between perf_cgroup_sched_out() and perf_cgroup_sched_in(). But we can't just compare prev->cgroups with next->cgroups to decide whether to skip cpuctx sched_out/in since the prev->cgroups is changing too. For example: CPU1 CPU2 cgroup_migrate_execute() prev->cgroups = ? perf_cgroup_attach() task_function_call(task, __perf_cgroup_move) perf_cgroup_switch(task) cgrp1 = perf_cgroup_from_task(prev) cgrp2 = perf_cgroup_from_task(next) if (cgrp1 != cgrp2) cpuctx sched_out/in ... task_function_call() will return -ESRCH In the above example, prev->cgroups changing cause (cgrp1 == cgrp2) to be true, so skip cpuctx sched_out/in. And later task_function_call() would return -ESRCH since the prev task isn't running on cpu anymore. So we would leave perf_events of the old prev->cgroups still sched on the CPU, which is wrong. The solution is that we should use cpuctx->cgrp to compare with the next task's perf_cgroup. Since cpuctx->cgrp can only be changed on local CPU, and we have irq disabled, we can read cpuctx->cgrp to compare without holding ctx lock. Fixes: a8d757ef076f ("perf events: Fix slow and broken cgroup context switch code") Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-4-zhouchengming@bytedance.com --- kernel/events/core.c | 132 ++++++++----------------------------------- 1 file changed, 25 insertions(+), 107 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a08fb92b3934..bdeb41fe7f15 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -824,17 +824,12 @@ perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); -#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ -#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ - /* * reschedule events based on the cgroup constraint of task. - * - * mode SWOUT : schedule out everything - * mode SWIN : schedule in based on cgroup for next */ -static void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task) { + struct perf_cgroup *cgrp; struct perf_cpu_context *cpuctx, *tmp; struct list_head *list; unsigned long flags; @@ -845,35 +840,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) */ local_irq_save(flags); + cgrp = perf_cgroup_from_task(task, NULL); + list = this_cpu_ptr(&cgrp_cpuctx_list); list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + if (READ_ONCE(cpuctx->cgrp) == cgrp) + continue; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (mode & PERF_CGROUP_SWOUT) { - cpu_ctx_sched_out(cpuctx, EVENT_ALL); - /* - * must not be done before ctxswout due - * to event_filter_match() in event_sched_out() - */ - cpuctx->cgrp = NULL; - } + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + /* + * must not be done before ctxswout due + * to update_cgrp_time_from_cpuctx() in + * ctx_sched_out() + */ + cpuctx->cgrp = cgrp; + /* + * set cgrp before ctxsw in to allow + * perf_cgroup_set_timestamp() in ctx_sched_in() + * to not have to pass task around + */ + cpu_ctx_sched_in(cpuctx, EVENT_ALL); - if (mode & PERF_CGROUP_SWIN) { - WARN_ON_ONCE(cpuctx->cgrp); - /* - * set cgrp before ctxsw in to allow - * perf_cgroup_set_timestamp() in ctx_sched_in() - * to not have to pass task around - * we pass the cpuctx->ctx to perf_cgroup_from_task() - * because cgorup events are only per-cpu - */ - cpuctx->cgrp = perf_cgroup_from_task(task, - &cpuctx->ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL); - } perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } @@ -881,58 +872,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(next, NULL); - - /* - * only schedule out current cgroup events if we know - * that we are switching to a different cgroup. Otherwise, - * do no touch the cgroup events. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); - - rcu_read_unlock(); -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ - struct perf_cgroup *cgrp1; - struct perf_cgroup *cgrp2 = NULL; - - rcu_read_lock(); - /* - * we come here when we know perf_cgroup_events > 0 - * we do not need to pass the ctx here because we know - * we are holding the rcu lock - */ - cgrp1 = perf_cgroup_from_task(task, NULL); - cgrp2 = perf_cgroup_from_task(prev, NULL); - - /* - * only need to schedule in cgroup events if we are changing - * cgroup during ctxsw. Cgroup events were not scheduled - * out of ctxsw out if that was not the case. - */ - if (cgrp1 != cgrp2) - perf_cgroup_switch(task, PERF_CGROUP_SWIN); - - rcu_read_unlock(); -} - static int perf_cgroup_ensure_storage(struct perf_event *event, struct cgroup_subsys_state *css) { @@ -1096,16 +1035,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, { } -static inline void perf_cgroup_sched_out(struct task_struct *task, - struct task_struct *next) -{ -} - -static inline void perf_cgroup_sched_in(struct task_struct *prev, - struct task_struct *task) -{ -} - static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, struct perf_event_attr *attr, struct perf_event *group_leader) @@ -1118,11 +1047,6 @@ perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx) { } -static inline void -perf_cgroup_switch(struct task_struct *task, struct task_struct *next) -{ -} - static inline u64 perf_cgroup_event_time(struct perf_event *event) { return 0; @@ -1142,6 +1066,10 @@ static inline void perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx) { } + +static void perf_cgroup_switch(struct task_struct *task) +{ +} #endif /* @@ -3661,7 +3589,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_out(task, next); + perf_cgroup_switch(next); } /* @@ -3975,16 +3903,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, struct perf_event_context *ctx; int ctxn; - /* - * If cgroup events exist on this CPU, then we need to check if we have - * to switch in PMU state; cgroup event are system-wide mode only. - * - * Since cgroup events are CPU events, we must schedule these in before - * we schedule in the task events. - */ - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_sched_in(prev, task); - for_each_task_context_nr(ctxn) { ctx = task->perf_event_ctxp[ctxn]; if (likely(!ctx)) @@ -13556,7 +13474,7 @@ static int __perf_cgroup_move(void *info) { struct task_struct *task = info; rcu_read_lock(); - perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); + perf_cgroup_switch(task); rcu_read_unlock(); return 0; } From e19cd0b6fa5938c51d7b928010d584f0de93913a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 29 Mar 2022 23:45:23 +0800 Subject: [PATCH 239/708] perf/core: Always set cpuctx cgrp when enable cgroup event When enable a cgroup event, cpuctx->cgrp setting is conditional on the current task cgrp matching the event's cgroup, so have to do it for every new event. It brings complexity but no advantage. To keep it simple, this patch would always set cpuctx->cgrp when enable the first cgroup event, and reset to NULL when disable the last cgroup event. Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220329154523.86438-5-zhouchengming@bytedance.com --- kernel/events/core.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index bdeb41fe7f15..23bb19716ad3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -967,22 +967,10 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct */ cpuctx = container_of(ctx, struct perf_cpu_context, ctx); - /* - * Since setting cpuctx->cgrp is conditional on the current @cgrp - * matching the event's cgroup, we must do this for every new event, - * because if the first would mismatch, the second would not try again - * and we would leave cpuctx->cgrp unset. - */ - if (ctx->is_active && !cpuctx->cgrp) { - struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx); - - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) - cpuctx->cgrp = cgrp; - } - if (ctx->nr_cgroups++) return; + cpuctx->cgrp = perf_cgroup_from_task(current, ctx); list_add(&cpuctx->cgrp_cpuctx_entry, per_cpu_ptr(&cgrp_cpuctx_list, event->cpu)); } @@ -1004,9 +992,7 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c if (--ctx->nr_cgroups) return; - if (ctx->is_active && cpuctx->cgrp) - cpuctx->cgrp = NULL; - + cpuctx->cgrp = NULL; list_del(&cpuctx->cgrp_cpuctx_entry); } From 9ce02f0fc68326dd1f87a0a3a4c6ae7fdd39e6f6 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 24 Mar 2022 11:37:42 +0900 Subject: [PATCH 240/708] x86/bug: Prevent shadowing in __WARN_FLAGS The macro __WARN_FLAGS() uses a local variable named "f". This being a common name, there is a risk of shadowing other variables. For example, GCC would yield: | In file included from ./include/linux/bug.h:5, | from ./include/linux/cpumask.h:14, | from ./arch/x86/include/asm/cpumask.h:5, | from ./arch/x86/include/asm/msr.h:11, | from ./arch/x86/include/asm/processor.h:22, | from ./arch/x86/include/asm/timex.h:5, | from ./include/linux/timex.h:65, | from ./include/linux/time32.h:13, | from ./include/linux/time.h:60, | from ./include/linux/stat.h:19, | from ./include/linux/module.h:13, | from virt/lib/irqbypass.mod.c:1: | ./include/linux/rcupdate.h: In function 'rcu_head_after_call_rcu': | ./arch/x86/include/asm/bug.h:80:21: warning: declaration of 'f' shadows a parameter [-Wshadow] | 80 | __auto_type f = BUGFLAG_WARNING|(flags); \ | | ^ | ./include/asm-generic/bug.h:106:17: note: in expansion of macro '__WARN_FLAGS' | 106 | __WARN_FLAGS(BUGFLAG_ONCE | \ | | ^~~~~~~~~~~~ | ./include/linux/rcupdate.h:1007:9: note: in expansion of macro 'WARN_ON_ONCE' | 1007 | WARN_ON_ONCE(func != (rcu_callback_t)~0L); | | ^~~~~~~~~~~~ | In file included from ./include/linux/rbtree.h:24, | from ./include/linux/mm_types.h:11, | from ./include/linux/buildid.h:5, | from ./include/linux/module.h:14, | from virt/lib/irqbypass.mod.c:1: | ./include/linux/rcupdate.h:1001:62: note: shadowed declaration is here | 1001 | rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) | | ~~~~~~~~~~~~~~~^ For reference, sparse also warns about it, c.f. [1]. This patch renames the variable from f to __flags (with two underscore prefixes as suggested in the Linux kernel coding style [2]) in order to prevent collisions. [1] https://lore.kernel.org/all/CAFGhKbyifH1a+nAMCvWM88TK6fpNPdzFtUXPmRGnnQeePV+1sw@mail.gmail.com/ [2] Linux kernel coding style, section 12) Macros, Enums and RTL, paragraph 5) namespace collisions when defining local variables in macros resembling functions https://www.kernel.org/doc/html/latest/process/coding-style.html#macros-enums-and-rtl Fixes: bfb1a7c91fb7 ("x86/bug: Merge annotate_reachable() into_BUG_FLAGS() asm") Signed-off-by: Vincent Mailhol Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nick Desaulniers Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20220324023742.106546-1-mailhol.vincent@wanadoo.fr --- arch/x86/include/asm/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 4d20a293c6fd..aaf0cb0db4ae 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -78,9 +78,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ - __auto_type f = BUGFLAG_WARNING|(flags); \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) From d139bca4b824ffb9731763c31b271a24b595948a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Mar 2022 12:33:31 +0100 Subject: [PATCH 241/708] objtool: Fix IBT tail-call detection Objtool reports: arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_blocks_avx() falls through to next function poly1305_blocks_x86_64() arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_emit_avx() falls through to next function poly1305_emit_x86_64() arch/x86/crypto/poly1305-x86_64.o: warning: objtool: poly1305_blocks_avx2() falls through to next function poly1305_blocks_x86_64() Which reads like: 0000000000000040 : 40: f3 0f 1e fa endbr64 ... 0000000000000400 : 400: f3 0f 1e fa endbr64 404: 44 8b 47 14 mov 0x14(%rdi),%r8d 408: 48 81 fa 80 00 00 00 cmp $0x80,%rdx 40f: 73 09 jae 41a 411: 45 85 c0 test %r8d,%r8d 414: 0f 84 2a fc ff ff je 44 ... These are simple conditional tail-calls and *should* be recognised as such by objtool, however due to a mistake in commit 08f87a93c8ec ("objtool: Validate IBT assumptions") this is failing. Specifically, the jump_dest is +4, this means the instruction pointed at will not be ENDBR and as such it will fail the second clause of is_first_func_insn() that was supposed to capture this exact case. Instead, have is_first_func_insn() look at the previous instruction. Fixes: 08f87a93c8ec ("objtool: Validate IBT assumptions") Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220322115125.811582125@infradead.org --- tools/objtool/check.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6de5085e3e5a..b848e1ddd5d8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1239,11 +1239,20 @@ static bool same_function(struct instruction *insn1, struct instruction *insn2) return insn1->func->pfunc == insn2->func->pfunc; } -static bool is_first_func_insn(struct instruction *insn) +static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) { - return insn->offset == insn->func->offset || - (insn->type == INSN_ENDBR && - insn->offset == insn->func->offset + insn->len); + if (insn->offset == insn->func->offset) + return true; + + if (ibt) { + struct instruction *prev = prev_insn_same_sym(file, insn); + + if (prev && prev->type == INSN_ENDBR && + insn->offset == insn->func->offset + prev->len) + return true; + } + + return false; } /* @@ -1327,7 +1336,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest->func->pfunc = insn->func; } else if (!same_function(insn, insn->jump_dest) && - is_first_func_insn(insn->jump_dest)) { + is_first_func_insn(file, insn->jump_dest)) { /* internal sibling call (without reloc) */ add_call_dest(file, insn, insn->jump_dest->func, true); } From 7a53f408902d913cd541b4f8ad7dbcd4961f5b82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Mar 2022 23:35:01 +0100 Subject: [PATCH 242/708] objtool: Fix SLS validation for kcov tail-call replacement Since not all compilers have a function attribute to disable KCOV instrumentation, objtool can rewrite KCOV instrumentation in noinstr functions as per commit: f56dae88a81f ("objtool: Handle __sanitize_cov*() tail calls") However, this has subtle interaction with the SLS validation from commit: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") In that when a tail-call instrucion is replaced with a RET an additional INT3 instruction is also written, but is not represented in the decoded instruction stream. This then leads to false positive missing INT3 objtool warnings in noinstr code. Instead of adding additional struct instruction objects, mark the RET instruction with retpoline_safe to suppress the warning (since we know there really is an INT3). Fixes: 1cc1e4c8aab4 ("objtool: Add straight-line-speculation validation") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220323230712.GA8939@worktop.programming.kicks-ass.net --- tools/objtool/check.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b848e1ddd5d8..bd0c2c828940 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1155,6 +1155,17 @@ static void annotate_call_site(struct objtool_file *file, : arch_nop_insn(insn->len)); insn->type = sibling ? INSN_RETURN : INSN_NOP; + + if (sibling) { + /* + * We've replaced the tail-call JMP insn by two new + * insn: RET; INT3, except we only have a single struct + * insn here. Mark it retpoline_safe to avoid the SLS + * warning, instead of adding another insn. + */ + insn->retpoline_safe = true; + } + return; } From 42193ffd79bd3acd91bd947e53f3548a3661d0a1 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 2 Apr 2022 12:50:37 +0300 Subject: [PATCH 243/708] netfilter: nf_tables: memcg accounting for dynamically allocated objects nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to use __GFP_ACCOUNT flag for objects that are dynamically allocated from the packet path. Such objects are allocated inside nft_expr_ops->init() callbacks executed in task context while processing netlink messages. In addition, this patch adds accounting to nft_set_elem_expr_clone() used for the same purposes. Signed-off-by: Vasily Averin Acked-by: Roman Gushchin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_connlimit.c | 2 +- net/netfilter/nft_counter.c | 2 +- net/netfilter/nft_last.c | 2 +- net/netfilter/nft_limit.c | 2 +- net/netfilter/nft_quota.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5ddfdb2adaf1..128ee3b300d6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5526,7 +5526,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, int err, i, k; for (i = 0; i < set->num_exprs; i++) { - expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL); + expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT); if (!expr) goto err_expr; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 9de1462e4ac4..d657f999a11b 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, invert = true; } - priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL); + priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT); if (!priv->list) return -ENOMEM; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index da9083605a61..f4d3573e8782 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[], struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; - cpu_stats = alloc_percpu(struct nft_counter); + cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT); if (cpu_stats == NULL) return -ENOMEM; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 43d0d4aadb1f..bb15a55dad5c 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, u64 last_jiffies; int err; - last = kzalloc(sizeof(*last), GFP_KERNEL); + last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT); if (!last) return -ENOMEM; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index d4a6cf3cd697..04ea8b9bf202 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv, priv->rate); } - priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL); + priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT); if (!priv->limit) return -ENOMEM; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index d7db57ed3bc1..e6b0df68feea 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return -EOPNOTSUPP; } - priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL); + priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT); if (!priv->consumed) return -ENOMEM; From 2b04bd4f03bba021959ca339314f6739710f0954 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 4 Apr 2022 12:53:36 +0000 Subject: [PATCH 244/708] dpaa2-ptp: Fix refcount leak in dpaa2_ptp_probe This node pointer is returned by of_find_compatible_node() with refcount incremented. Calling of_node_put() to aovid the refcount leak. Fixes: d346c9e86d86 ("dpaa2-ptp: reuse ptp_qoriq driver") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220404125336.13427-1-linmq006@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 5f5f8c53c4a0..c8cb541572ff 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -167,7 +167,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) base = of_iomap(node, 0); if (!base) { err = -ENOMEM; - goto err_close; + goto err_put; } err = fsl_mc_allocate_irqs(mc_dev); @@ -210,6 +210,8 @@ err_free_mc_irq: fsl_mc_free_irqs(mc_dev); err_unmap: iounmap(base); +err_put: + of_node_put(node); err_close: dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: From ccfee1822042b87e5135d33cad8ea353e64612d2 Mon Sep 17 00:00:00 2001 From: Anatolii Gerasymenko Date: Mon, 4 Apr 2022 11:35:47 -0700 Subject: [PATCH 245/708] ice: Set txq_teid to ICE_INVAL_TEID on ring creation When VF is freshly created, but not brought up, ring->txq_teid value is by default set to 0. But 0 is a valid TEID. On some platforms the Root Node of Tx scheduler has a TEID = 0. This can cause issues as shown below. The proper way is to set ring->txq_teid to ICE_INVAL_TEID (0xFFFFFFFF). Testing Hints: echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs ip link set dev ens785f0v0 up ip link set dev ens785f0v0 down If we have freshly created VF and quickly turn it on and off, so there would be no time to reach VIRTCHNL_OP_CONFIG_VSI_QUEUES stage, then VIRTCHNL_OP_DISABLE_QUEUES stage will fail with error: [ 639.531454] disable queue 89 failed 14 [ 639.532233] Failed to disable LAN Tx queues, error: ICE_ERR_AQ_ERROR [ 639.533107] ice 0000:02:00.0: Failed to stop Tx ring 0 on VSI 5 The reason for the fail is that we are trying to send AQ command to delete queue 89, which has never been created and receive an "invalid argument" error from firmware. As this queue has never been created, it's teid and ring->txq_teid have default value 0. ice_dis_vsi_txq has a check against non-existent queues: node = ice_sched_find_node_by_teid(pi->root, q_teids[i]); if (!node) continue; But on some platforms the Root Node of Tx scheduler has a teid = 0. Hence, ice_sched_find_node_by_teid finds a node with teid = 0 (it is pi->root), and we go further to submit an erroneous request to firmware. Fixes: 37bb83901286 ("ice: Move common functions out of ice_main.c part 7/7") Signed-off-by: Anatolii Gerasymenko Reviewed-by: Maciej Fijalkowski Tested-by: Konrad Jankowski Signed-off-by: Alice Michael Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 6d6233204388..2774cbd5b12a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1480,6 +1480,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; ring->count = vsi->num_tx_desc; + ring->txq_teid = ICE_INVAL_TEID; if (dvm_ena) ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2; else From 05ef6813b234db3196f083b91db3963f040b65bb Mon Sep 17 00:00:00 2001 From: Anatolii Gerasymenko Date: Mon, 4 Apr 2022 11:35:48 -0700 Subject: [PATCH 246/708] ice: Do not skip not enabled queues in ice_vc_dis_qs_msg Disable check for queue being enabled in ice_vc_dis_qs_msg, because there could be a case when queues were created, but were not enabled. We still need to delete those queues. Normal workflow for VF looks like: Enable path: VIRTCHNL_OP_ADD_ETH_ADDR (opcode 10) VIRTCHNL_OP_CONFIG_VSI_QUEUES (opcode 6) VIRTCHNL_OP_ENABLE_QUEUES (opcode 8) Disable path: VIRTCHNL_OP_DISABLE_QUEUES (opcode 9) VIRTCHNL_OP_DEL_ETH_ADDR (opcode 11) The issue appears only in stress conditions when VF is enabled and disabled very fast. Eventually there will be a case, when queues are created by VIRTCHNL_OP_CONFIG_VSI_QUEUES, but are not enabled by VIRTCHNL_OP_ENABLE_QUEUES. In turn, these queues are not deleted by VIRTCHNL_OP_DISABLE_QUEUES, because there is a check whether queues are enabled in ice_vc_dis_qs_msg. When we bring up the VF again, we will see the "Failed to set LAN Tx queue context" error during VIRTCHNL_OP_CONFIG_VSI_QUEUES step. This happens because old 16 queues were not deleted and VF requests to create 16 more, but ice_sched_get_free_qparent in ice_ena_vsi_txq would fail to find a parent node for first newly requested queue (because all nodes are allocated to 16 old queues). Testing Hints: Just enable and disable VF fast enough, so it would be disabled before reaching VIRTCHNL_OP_ENABLE_QUEUES. while true; do ip link set dev ens785f0v0 up sleep 0.065 # adjust delay value for you machine ip link set dev ens785f0v0 down done Fixes: 77ca27c41705 ("ice: add support for virtchnl_queue_select.[tx|rx]_queues bitmap") Signed-off-by: Anatolii Gerasymenko Tested-by: Konrad Jankowski Signed-off-by: Alice Michael Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 3f1a63815bac..69ff4b929772 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1358,9 +1358,9 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - /* Skip queue if not enabled */ if (!test_bit(vf_q_id, vf->txq_ena)) - continue; + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + vf_q_id, vsi->vsi_num); ice_fill_txq_meta(vsi, ring, &txq_meta); From 1158f79f82d437093aeed87d57df0548bdd68146 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 4 Apr 2022 09:09:08 -0600 Subject: [PATCH 247/708] ipv6: Fix stats accounting in ip6_pkt_drop VRF devices are the loopbacks for VRFs, and a loopback can not be assigned to a VRF. Accordingly, the condition in ip6_pkt_drop should be '||' not '&&'. Fixes: 1d3fd8a10bed ("vrf: Use orig netdev to count Ip6InNoRoutes and a fresh route lookup when sending dest unreach") Reported-by: Pudak, Filip Reported-by: Xiao, Jiguang Signed-off-by: David Ahern Link: https://lore.kernel.org/r/20220404150908.2937-1-dsahern@kernel.org Signed-off-by: Paolo Abeni --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2fa10e60cccd..169e9df6d172 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4484,7 +4484,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct inet6_dev *idev; int type; - if (netif_is_l3_master(skb->dev) && + if (netif_is_l3_master(skb->dev) || dst->dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else From 00c22013467069197dc006c943ca1f0395ca8aaa Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Wed, 30 Mar 2022 09:43:06 -0700 Subject: [PATCH 248/708] KVM: SEV: Add cond_resched() to loop in sev_clflush_pages() Add resched to avoid warning from sev_clflush_pages() with large number of pages. Signed-off-by: Peter Gonda Cc: Sean Christopherson Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <20220330164306.2376085-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 75fa6dd268f0..c2fe89ecdb2d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -465,6 +465,7 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) page_virtual = kmap_atomic(pages[i]); clflush_cache_range(page_virtual, PAGE_SIZE); kunmap_atomic(page_virtual); + cond_resched(); } } From 1d0e84806047f38027d7572adb4702ef7c09b317 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 31 Mar 2022 22:13:59 +0000 Subject: [PATCH 249/708] KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded Resolve nx_huge_pages to true/false when kvm.ko is loaded, leaving it as -1 is technically undefined behavior when its value is read out by param_get_bool(), as boolean values are supposed to be '0' or '1'. Alternatively, KVM could define a custom getter for the param, but the auto value doesn't depend on the vendor module in any way, and printing "auto" would be unnecessarily unfriendly to the user. In addition to fixing the undefined behavior, resolving the auto value also fixes the scenario where the auto value resolves to N and no vendor module is loaded. Previously, -1 would result in Y being printed even though KVM would ultimately disable the mitigation. Rename the existing MMU module init/exit helpers to clarify that they're invoked with respect to the vendor module, and add comments to document why KVM has two separate "module init" flows. ========================================================================= UBSAN: invalid-load in kernel/params.c:320:33 load of value 255 is not a valid value for type '_Bool' CPU: 6 PID: 892 Comm: tail Not tainted 5.17.0-rc3+ #799 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x34/0x44 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value.cold+0x43/0x48 param_get_bool.cold+0xf/0x14 param_attr_show+0x55/0x80 module_attr_show+0x1c/0x30 sysfs_kf_seq_show+0x93/0xc0 seq_read_iter+0x11c/0x450 new_sync_read+0x11b/0x1a0 vfs_read+0xf0/0x190 ksys_read+0x5f/0xe0 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae ========================================================================= Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Cc: stable@vger.kernel.org Reported-by: Bruno Goncalves Reported-by: Jan Stancek Signed-off-by: Sean Christopherson Message-Id: <20220331221359.3912754-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++-- arch/x86/kvm/mmu/mmu.c | 20 ++++++++++++++++---- arch/x86/kvm/x86.c | 20 ++++++++++++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d23e80a56eb8..0d37ba442de3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1585,8 +1585,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) -int kvm_mmu_module_init(void); -void kvm_mmu_module_exit(void); +void kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8f19ea752704..f9080ee50ffa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6237,12 +6237,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } -int kvm_mmu_module_init(void) +/* + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as + * its default value of -1 is technically undefined behavior for a boolean. + */ +void kvm_mmu_x86_module_init(void) { - int ret = -ENOMEM; - if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); +} + +/* + * The bulk of the MMU initialization is deferred until the vendor module is + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need + * to be reset when a potentially different vendor module is loaded. + */ +int kvm_mmu_vendor_module_init(void) +{ + int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an @@ -6290,7 +6302,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } -void kvm_mmu_module_exit(void) +void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c0ca599a353..de49a88df1c2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8926,7 +8926,7 @@ int kvm_arch_init(void *opaque) } kvm_nr_uret_msrs = 0; - r = kvm_mmu_module_init(); + r = kvm_mmu_vendor_module_init(); if (r) goto out_free_percpu; @@ -8974,7 +8974,7 @@ void kvm_arch_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; - kvm_mmu_module_exit(); + kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); #ifdef CONFIG_KVM_XEN @@ -12986,3 +12986,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); + +static int __init kvm_x86_init(void) +{ + kvm_mmu_x86_module_init(); + return 0; +} +module_init(kvm_x86_init); + +static void __exit kvm_x86_exit(void) +{ + /* + * If module_init() is implemented, module_exit() must also be + * implemented to allow module unload. + */ +} +module_exit(kvm_x86_exit); From 3203a56a0f0eaaf4ea7fc01467378c4bce3841ff Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 1 Apr 2022 08:35:30 +0000 Subject: [PATCH 250/708] KVM: x86/mmu: remove unnecessary flush_workqueue() All work currently pending will be done first by calling destroy_workqueue, so there is unnecessary to flush it explicitly. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Sean Christopherson Message-Id: <20220401083530.2407703-1-lv.ruyi@zte.com.cn> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d71d177ae6b8..c472769e0300 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -51,7 +51,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); + /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); From c1be1ef1b4a7589878d63673b7b322856989064e Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sun, 3 Apr 2022 13:57:36 +0700 Subject: [PATCH 251/708] Documentation: kvm: Add missing line break in api.rst Add missing line break separator between literal block and description of KVM_EXIT_RISCV_SBI. This fixes: /Documentation/virt/kvm/api.rst:6118: WARNING: Literal block ends without a blank line; unexpected unindent. Fixes: da40d85805937d (RISC-V: KVM: Document RISC-V specific parts of KVM API, 2021-09-27) Cc: Anup Patel Cc: Paolo Bonzini Cc: Jonathan Corbet Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-riscv@lists.infradead.org Signed-off-by: Bagas Sanjaya Message-Id: <20220403065735.23859-1-bagasdotme@gmail.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d13fa6600467..85c7abc51af5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6190,6 +6190,7 @@ Valid values for 'type' are: unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has done a SBI call which is not handled by KVM RISC-V kernel module. The details of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The From c31bc0461f861ec68e026b5d4d27394bcb722068 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Mar 2022 16:25:52 +0200 Subject: [PATCH 252/708] powerpc/pseries/vas: use default_groups in kobj_type There are currently 2 ways to create a set of sysfs files for a kobj_type, through the default_attrs field, and the default_groups field. Move the pseries vas sysfs code to use default_groups field which has been the preferred way since aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") so that we can soon get rid of the obsolete default_attrs field. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Haren Myneni Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20220329142552.558339-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/vas-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 4a7fcde5afc0..909535ca513a 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -99,6 +99,7 @@ static struct attribute *vas_def_capab_attrs[] = { &nr_used_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_def_capab); static struct attribute *vas_qos_capab_attrs[] = { &nr_total_credits_attribute.attr, @@ -106,6 +107,7 @@ static struct attribute *vas_qos_capab_attrs[] = { &update_total_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_qos_capab); static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -154,13 +156,13 @@ static const struct sysfs_ops vas_sysfs_ops = { static struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_def_capab_attrs, + .default_groups = vas_def_capab_groups, }; static struct kobj_type vas_qos_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_qos_capab_attrs, + .default_groups = vas_qos_capab_groups, }; static char *vas_caps_kobj_name(struct vas_caps_entry *centry, From cdb4f26a63c391317e335e6e683a614358e70aeb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Jan 2022 14:31:51 +0100 Subject: [PATCH 253/708] kobject: kobj_type: remove default_attrs Now that all in-kernel users of default_attrs for the kobj_type are gone and converted to properly use the default_groups pointer instead, it can be safely removed. There is one standard way to create sysfs files in a kobj_type, and not two like before, causing confusion as to which should be used. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20220106133151.607703-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 13 ------------- include/linux/kobject.h | 1 - lib/kobject.c | 32 -------------------------------- 3 files changed, 46 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 42dcf96881b6..a12ac0356c69 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -703,19 +703,6 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) ktype = get_ktype(kobj); if (ktype) { - struct attribute **kattr; - - /* - * Change owner of the default attributes associated with the - * ktype of @kobj. - */ - for (kattr = ktype->default_attrs; kattr && *kattr; kattr++) { - error = sysfs_file_change_owner(kobj, (*kattr)->name, - kuid, kgid); - if (error) - return error; - } - /* * Change owner of the default groups associated with the * ktype of @kobj. diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c7b47399b36a..57fb972fea05 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -120,7 +120,6 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; /* use default_groups instead */ const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); diff --git a/lib/kobject.c b/lib/kobject.c index 56fa037501b5..5f0e71ab292c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -54,32 +54,6 @@ void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) kobj->ktype->get_ownership(kobj, uid, gid); } -/* - * populate_dir - populate directory with attributes. - * @kobj: object we're working on. - * - * Most subsystems have a set of default attributes that are associated - * with an object that registers with them. This is a helper called during - * object registration that loops through the default attributes of the - * subsystem and creates attributes files for them in sysfs. - */ -static int populate_dir(struct kobject *kobj) -{ - const struct kobj_type *t = get_ktype(kobj); - struct attribute *attr; - int error = 0; - int i; - - if (t && t->default_attrs) { - for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) { - error = sysfs_create_file(kobj, attr); - if (error) - break; - } - } - return error; -} - static int create_dir(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); @@ -90,12 +64,6 @@ static int create_dir(struct kobject *kobj) if (error) return error; - error = populate_dir(kobj); - if (error) { - sysfs_remove_dir(kobj); - return error; - } - if (ktype) { error = sysfs_create_groups(kobj, ktype->default_groups); if (error) { From 48bff1053c172e6c7f340e506027d118147c8b7f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 5 Apr 2022 15:57:05 +0200 Subject: [PATCH 254/708] random: opportunistically initialize on /dev/urandom reads In 6f98a4bfee72 ("random: block in /dev/urandom"), we tried to make a successful try_to_generate_entropy() call *required* if the RNG was not already initialized. Unfortunately, weird architectures and old userspaces combined in TCG test harnesses, making that change still not realistic, so it was reverted in 0313bc278dac ("Revert "random: block in /dev/urandom""). However, rather than making a successful try_to_generate_entropy() call *required*, we can instead make it *best-effort*. If try_to_generate_entropy() fails, it fails, and nothing changes from the current behavior. If it succeeds, then /dev/urandom becomes safe to use for free. This way, we don't risk the regression potential that led to us reverting the required-try_to_generate_entropy() call before. Practically speaking, this means that at least on x86, /dev/urandom becomes safe. Probably other architectures with working cycle counters will also become safe. And architectures with slow or broken cycle counters at least won't be affected at all by this change. So it may not be the glorious "all things are unified!" change we were hoping for initially, but practically speaking, it makes a positive impact. Cc: Theodore Ts'o Cc: Dominik Brodowski Cc: Linus Torvalds Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index ee3ad2ba0942..388025d6d38d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1534,6 +1534,13 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, { static int maxwarn = 10; + /* + * Opportunistically attempt to initialize the RNG on platforms that + * have fast cycle counters, but don't (for now) require it to succeed. + */ + if (!crng_ready()) + try_to_generate_entropy(); + if (!crng_ready() && maxwarn > 0) { maxwarn--; if (__ratelimit(&urandom_warning)) From 6150f276073a1480030242a7e006a89e161d6cd6 Mon Sep 17 00:00:00 2001 From: Kyle Copperfield Date: Sat, 20 Nov 2021 13:23:02 +0100 Subject: [PATCH 255/708] media: rockchip/rga: do proper error checking in probe The latest fix for probe error handling contained a typo that causes probing to fail with the following message: rockchip-rga: probe of ff680000.rga failed with error -12 This patch fixes the typo. Fixes: e58430e1d4fd (media: rockchip/rga: fix error handling in probe) Reviewed-by: Dragan Simic Signed-off-by: Kyle Copperfield Reviewed-by: Kieran Bingham Reviewed-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rga/rga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 4de5e8d2b261..3d3d1062e212 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -892,7 +892,7 @@ static int rga_probe(struct platform_device *pdev) } rga->dst_mmu_pages = (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3); - if (rga->dst_mmu_pages) { + if (!rga->dst_mmu_pages) { ret = -ENOMEM; goto free_src_pages; } From fa4dcc880390fbedf4118e9f88a6b13363e0a7a1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Mar 2022 21:19:56 +0800 Subject: [PATCH 256/708] irq/qcom-mpm: Fix build error without MAILBOX If MAILBOX is n, building fails: drivers/irqchip/irq-qcom-mpm.o: In function `mpm_pd_power_off': irq-qcom-mpm.c:(.text+0x174): undefined reference to `mbox_send_message' irq-qcom-mpm.c:(.text+0x174): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `mbox_send_message' Make QCOM_MPM depends on MAILBOX to fix this. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Signed-off-by: YueHaibing Acked-by: Shawn Guo Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220317131956.30004-1-yuehaibing@huawei.com --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 680d2fcf2686..15edb9a6fcae 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -433,6 +433,7 @@ config QCOM_PDC config QCOM_MPM tristate "QCOM MPM" depends on ARCH_QCOM + depends on MAILBOX select IRQ_DOMAIN_HIERARCHY help MSM Power Manager driver to manage and configure wakeup From 76ff614a79152cee07a2c48080c3dc91c56f0f1d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Mar 2022 10:51:00 +0800 Subject: [PATCH 257/708] irqchip/irq-qcom-mpm: fix return value check in qcom_mpm_init() If devm_platform_ioremap_resource() fails, it never returns NULL, replace NULL check with IS_ERR(). Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Acked-by: Shawn Guo Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220316025100.1758413-1-yangyingliang@huawei.com --- drivers/irqchip/irq-qcom-mpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index eea5a753618c..d30614661eea 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -375,7 +375,7 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent) raw_spin_lock_init(&priv->lock); priv->base = devm_platform_ioremap_resource(pdev, 0); - if (!priv->base) + if (IS_ERR(priv->base)) return PTR_ERR(priv->base); for (i = 0; i < priv->reg_stride; i++) { From af27e41612ec7e5b4783f589b753a7c31a37aac8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 17 Mar 2022 09:49:02 +0000 Subject: [PATCH 258/708] irqchip/gic-v4: Wait for GICR_VPENDBASER.Dirty to clear before descheduling The way KVM drives GICv4.{0,1} is as follows: - vcpu_load() makes the VPE resident, instructing the RD to start scanning for interrupts - just before entering the guest, we check that the RD has finished scanning and that we can start running the vcpu - on preemption, we deschedule the VPE by making it invalid on the RD However, we are preemptible between the first two steps. If it so happens *and* that the RD was still scanning, we nonetheless write to the GICR_VPENDBASER register while Dirty is set, and bad things happen (we're in UNPRED land). This affects both the 4.0 and 4.1 implementations. Make sure Dirty is cleared before performing the deschedule, meaning that its_clear_vpend_valid() becomes a sort of full VPE residency barrier. Reported-by: Jingyi Wang Tested-by: Nianyao Tang Signed-off-by: Marc Zyngier Fixes: 57e3cebd022f ("KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit") Link: https://lore.kernel.org/r/4aae10ba-b39a-5f84-754b-69c2eb0a2c03@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cd772973114a..a0fc764ec9dc 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3011,18 +3011,12 @@ static int __init allocate_lpi_tables(void) return 0; } -static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +static u64 read_vpend_dirty_clear(void __iomem *vlpi_base) { u32 count = 1000000; /* 1s! */ bool clean; u64 val; - val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - val &= ~clr; - val |= set; - gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - do { val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); clean = !(val & GICR_VPENDBASER_Dirty); @@ -3033,10 +3027,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) } } while (!clean && count); - if (unlikely(val & GICR_VPENDBASER_Dirty)) { + if (unlikely(!clean)) pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + + return val; +} + +static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +{ + u64 val; + + /* Make sure we wait until the RD is done with the initial scan */ + val = read_vpend_dirty_clear(vlpi_base); + val &= ~GICR_VPENDBASER_Valid; + val &= ~clr; + val |= set; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + val = read_vpend_dirty_clear(vlpi_base); + if (unlikely(val & GICR_VPENDBASER_Dirty)) val |= GICR_VPENDBASER_PendingLast; - } return val; } From 0df6664531a12cdd8fc873f0cac0dcb40243d3e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Mar 2022 16:50:32 +0000 Subject: [PATCH 259/708] irqchip/gic-v3: Fix GICR_CTLR.RWP polling It turns out that our polling of RWP is totally wrong when checking for it in the redistributors, as we test the *distributor* bit index, whereas it is a different bit number in the RDs... Oopsie boo. This is embarassing. Not only because it is wrong, but also because it took *8 years* to notice the blunder... Just fix the damn thing. Fixes: 021f653791ad ("irqchip: gic-v3: Initial support for GICv3") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andre Przywara Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220315165034.794482-2-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0efe1a9a9f3b..9b6316582515 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & bit) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base) /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } #ifdef CONFIG_ARM64 From 544808f7e21cb9ccdb8f3aa7de594c05b1419061 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 4 Apr 2022 12:08:42 +0100 Subject: [PATCH 260/708] irqchip/gic, gic-v3: Prevent GSI to SGI translations At the moment the GIC IRQ domain translation routine happily converts ACPI table GSI numbers below 16 to GIC SGIs (Software Generated Interrupts aka IPIs). On the Devicetree side we explicitly forbid this translation, actually the function will never return HWIRQs below 16 when using a DT based domain translation. We expect SGIs to be handled in the first part of the function, and any further occurrence should be treated as a firmware bug, so add a check and print to report this explicitly and avoid lengthy debug sessions. Fixes: 64b499d8df40 ("irqchip/gic-v3: Configure SGIs as standard interrupts") Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220404110842.2882446-1-andre.przywara@arm.com --- drivers/irqchip/irq-gic-v3.c | 6 ++++++ drivers/irqchip/irq-gic.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 9b6316582515..b252d5534547 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1466,6 +1466,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 58ba835bee1f..09c710ecc387 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1123,6 +1123,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; From f9124c68f05ffdb87a47e3ea6d5fae9dad7cb6eb Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 17 Mar 2022 19:36:27 +0100 Subject: [PATCH 261/708] ice: synchronize_rcu() when terminating rings Unfortunately, the ice driver doesn't respect the RCU critical section that XSK wakeup is surrounded with. To fix this, add synchronize_rcu() calls to paths that destroy resources that might be in use. This was addressed in other AF_XDP ZC enabled drivers, for reference see for example commit b3873a5be757 ("net/i40e: Fix concurrency issues between config flow and XSK") Fixes: efc2214b6047 ("ice: Add support for XDP") Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Shwetha Nagaraju Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 4 +++- drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 26eaee0b6503..8ed3c9ab7ff7 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -671,7 +671,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev) static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) { - return !!vsi->xdp_prog; + return !!READ_ONCE(vsi->xdp_prog); } static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1d2ca39add95..d2039a9306b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2758,8 +2758,10 @@ free_qmap: ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { - if (vsi->xdp_rings[i]->desc) + if (vsi->xdp_rings[i]->desc) { + synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); + } kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index dfbcaf08520e..33b28a72ffcb 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -41,8 +41,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) + if (ice_is_xdp_ena_vsi(vsi)) { + synchronize_rcu(); ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } From 72b915a2b444e9247c9d424a840e94263db07c27 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 17 Mar 2022 19:36:28 +0100 Subject: [PATCH 262/708] ice: xsk: fix VSI state check in ice_xsk_wakeup() ICE_DOWN is dedicated for pf->state. Check for ICE_VSI_DOWN being set on vsi->state in ice_xsk_wakeup(). Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Shwetha Nagaraju Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 33b28a72ffcb..866ee4df9671 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -920,7 +920,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) From e19778e6c911691856447c3bf9617f00b3e1347f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 17 Mar 2022 19:36:29 +0100 Subject: [PATCH 263/708] ice: clear cmd_type_offset_bsz for TX rings Currently when XDP rings are created, each descriptor gets its DD bit set, which turns out to be the wrong approach as it can lead to a situation where more descriptors get cleaned than it was supposed to, e.g. when AF_XDP busy poll is run with a large batch size. In this situation, the driver would request for more buffers than it is able to handle. Fix this by not setting the DD bits in ice_xdp_alloc_setup_rings(). They should be initialized to zero instead. Fixes: 9610bd988df9 ("ice: optimize XDP_TX workloads") Signed-off-by: Maciej Fijalkowski Tested-by: Shwetha Nagaraju Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d2039a9306b8..d768925785ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2562,7 +2562,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); - tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); + tx_desc->cmd_type_offset_bsz = 0; } } From c9db8a30d9f091aa571b5fb7c3f434cde107b02c Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 5 Apr 2022 15:36:22 +0300 Subject: [PATCH 264/708] ALSA: hda/i915 - skip acomp init if no matching display In systems with only a discrete i915 GPU, the acomp init will always timeout for the PCH HDA controller instance. Avoid the timeout by checking the PCI device hierarchy whether any display class PCI device can be found on the system, and at the same level as the HDA PCI device. If found, proceed with the acomp init, which will wait until i915 probe is complete and component binding can proceed. If no matching display device is found, the audio component bind can be safely skipped. The bind timeout will still be hit if the display is present in the system, but i915 driver does not bind to it by configuration choice or probe error. In this case the 60sec timeout will be hit. Signed-off-by: Kai Vehmanen Acked-by: Lucas De Marchi Link: https://lore.kernel.org/r/20220405123622.2874457-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/hdac_i915.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index efe810af28c5..48b8ed752b69 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -116,16 +116,25 @@ static int i915_component_master_match(struct device *dev, int subcomponent, return 0; } -/* check whether intel graphics is present */ -static bool i915_gfx_present(void) +/* check whether Intel graphics is present and reachable */ +static int i915_gfx_present(struct pci_dev *hdac_pci) { - static const struct pci_device_id ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_ANY_ID), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16 }, - {} - }; - return pci_dev_present(ids); + unsigned int class = PCI_BASE_CLASS_DISPLAY << 16; + struct pci_dev *display_dev = NULL; + bool match = false; + + do { + display_dev = pci_get_class(class, display_dev); + + if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL && + connectivity_check(display_dev, hdac_pci)) + match = true; + + pci_dev_put(display_dev); + + } while (!match && display_dev); + + return match; } /** @@ -145,7 +154,7 @@ int snd_hdac_i915_init(struct hdac_bus *bus) struct drm_audio_component *acomp; int err; - if (!i915_gfx_present()) + if (!i915_gfx_present(to_pci_dev(bus->dev))) return -ENODEV; err = snd_hdac_acomp_init(bus, NULL, From 1ef8715975de8bd481abbd0839ed4f49d9e5b0ff Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:08 +0200 Subject: [PATCH 265/708] ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: sound/usb/midi.c: In function ‘snd_usbmidi_out_endpoint_create’: sound/usb/midi.c:1389:2: error: case label does not reduce to an integer constant case USB_ID(0xfc08, 0x0101): /* Unknown vendor Cable */ ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. [ A slight correction with parentheses around the argument by tiwai ] Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220405151517.29753-3-bp@alien8.de Signed-off-by: Takashi Iwai --- sound/usb/usbaudio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 167834133b9b..b8359a0aa008 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -8,7 +8,7 @@ */ /* handling of USB vendor/product ID pairs as 32-bit numbers */ -#define USB_ID(vendor, product) (((vendor) << 16) | (product)) +#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product)) #define USB_ID_VENDOR(id) ((id) >> 16) #define USB_ID_PRODUCT(id) ((u16)(id)) From 73924ec4d560257004d5b5116b22a3647661e364 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Apr 2022 17:34:19 -0700 Subject: [PATCH 266/708] x86/pm: Save the MSR validity status at context setup The mechanism to save/restore MSRs during S3 suspend/resume checks for the MSR validity during suspend, and only restores the MSR if its a valid MSR. This is not optimal, as an invalid MSR will unnecessarily throw an exception for every suspend cycle. The more invalid MSRs, higher the impact will be. Check and save the MSR validity at setup. This ensures that only valid MSRs that are guaranteed to not throw an exception will be attempted during suspend. Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume") Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Reviewed-by: Dave Hansen Acked-by: Borislav Petkov Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/power/cpu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9f2b251e83c5..eaec0cb3fe04 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -40,7 +40,8 @@ static void msr_save_context(struct saved_context *ctxt) struct saved_msr *end = msr + ctxt->saved_msrs.num; while (msr < end) { - msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + if (msr->valid) + rdmsrl(msr->info.msr_no, msr->info.reg.q); msr++; } } @@ -424,8 +425,10 @@ static int msr_build_context(const u32 *msr_id, const int num) } for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + u64 dummy; + msr_array[i].info.msr_no = msr_id[j]; - msr_array[i].valid = false; + msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; From e2a1256b17b16f9b9adf1b6fea56819e7b68e463 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Apr 2022 17:35:45 -0700 Subject: [PATCH 267/708] x86/speculation: Restore speculation related MSRs during S3 resume After resuming from suspend-to-RAM, the MSRs that control CPU's speculative execution behavior are not being restored on the boot CPU. These MSRs are used to mitigate speculative execution vulnerabilities. Not restoring them correctly may leave the CPU vulnerable. Secondary CPU's MSRs are correctly being restored at S3 resume by identify_secondary_cpu(). During S3 resume, restore these MSRs for boot CPU when restoring its processor state. Fixes: 772439717dbf ("x86/bugs/intel: Set proper CPU features and setup RDS") Reported-by: Neelima Krishnan Signed-off-by: Pawan Gupta Tested-by: Neelima Krishnan Acked-by: Borislav Petkov Reviewed-by: Dave Hansen Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/power/cpu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index eaec0cb3fe04..3822666fb73d 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -503,10 +503,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } From ac2a3feefad549814f5e7cca30be07a255c8494a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Apr 2022 19:49:26 +0200 Subject: [PATCH 268/708] ACPI: bus: Eliminate acpi_bus_get_device() Replace the last instance of acpi_bus_get_device(), added recently by commit 87e59b36e5e2 ("spi: Support selection of the index of the ACPI Spi Resource before alloc"), with acpi_fetch_acpi_dev() and finally drop acpi_bus_get_device() that has no more users. Signed-off-by: Rafael J. Wysocki Acked-by: Mark Brown --- drivers/acpi/scan.c | 13 ------------- drivers/spi/spi.c | 3 ++- include/acpi/acpi_bus.h | 1 - 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9efbfe087de7..762b61f67e6c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -588,19 +588,6 @@ static struct acpi_device *handle_to_device(acpi_handle handle, return adev; } -int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device) -{ - if (!device) - return -EINVAL; - - *device = handle_to_device(handle, NULL); - if (!*device) - return -ENODEV; - - return 0; -} -EXPORT_SYMBOL(acpi_bus_get_device); - /** * acpi_fetch_acpi_dev - Retrieve ACPI device object. * @handle: ACPI handle associated with the requested ACPI device object. diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c4dd1200fe99..9bb5400af205 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2406,7 +2406,8 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) } else { struct acpi_device *adev; - if (acpi_bus_get_device(parent_handle, &adev)) + adev = acpi_fetch_acpi_dev(parent_handle); + if (!adev) return -ENODEV; ctlr = acpi_spi_find_controller_by_adev(adev); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3f7f01f03869..c4b78c21d793 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -509,7 +509,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); * External Functions */ -int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device); struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); From 9435be734ae9de020072bd4443d46e02d92564d1 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Wed, 23 Mar 2022 09:45:58 +0800 Subject: [PATCH 269/708] btrfs: zoned: remove redundant condition in btrfs_run_delalloc_range The logic !A || A && B is equivalent to !A || B. so we can make code clear. Note: though it's preferred to be in the more human readable form, there have been repeated reports and patches as the expression is detected by tools so apply it to reduce the load. Reviewed-by: Johannes Thumshirn Signed-off-by: Haowen Bai Reviewed-by: David Sterba [ add note ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5aab6af88349..286ab7c27db0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2016,8 +2016,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page * to use run_delalloc_nocow() here, like for regular * preallocated inodes. */ - ASSERT(!zoned || - (zoned && btrfs_is_data_reloc_root(inode->root))); + ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root)); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); } else if (!inode_can_compress(inode) || From 6d4a6b515c39f1f8763093e0f828959b2fbc2f45 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 24 Mar 2022 08:36:45 -0700 Subject: [PATCH 270/708] btrfs: remove unused variable in btrfs_{start,write}_dirty_block_groups() Clang's version of -Wunused-but-set-variable recently gained support for unary operations, which reveals two unused variables: fs/btrfs/block-group.c:2949:6: error: variable 'num_started' set but not used [-Werror,-Wunused-but-set-variable] int num_started = 0; ^ fs/btrfs/block-group.c:3116:6: error: variable 'num_started' set but not used [-Werror,-Wunused-but-set-variable] int num_started = 0; ^ 2 errors generated. These variables appear to be unused from their introduction, so just remove them to silence the warnings. Fixes: c9dc4c657850 ("Btrfs: two stage dirty block group writeout") Fixes: 1bbc621ef284 ("Btrfs: allow block group cache writeout outside critical section in commit") CC: stable@vger.kernel.org # 5.4+ Link: https://github.com/ClangBuiltLinux/linux/issues/1614 Signed-off-by: Nathan Chancellor Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c22d287e020b..9ad265066225 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2946,7 +2946,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -3012,7 +3011,6 @@ again: cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -3113,7 +3111,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -3171,7 +3168,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { From 6d82ad13c4110e73c7b0392f00534a1502a1b520 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 28 Mar 2022 21:32:05 +0900 Subject: [PATCH 271/708] btrfs: release correct delalloc amount in direct IO write path Running generic/406 causes the following WARNING in btrfs_destroy_inode() which tells there are outstanding extents left. In btrfs_get_blocks_direct_write(), we reserve a temporary outstanding extents with btrfs_delalloc_reserve_metadata() (or indirectly from btrfs_delalloc_reserve_space(()). We then release the outstanding extents with btrfs_delalloc_release_extents(). However, the "len" can be modified in the COW case, which releases fewer outstanding extents than expected. Fix it by calling btrfs_delalloc_release_extents() for the original length. To reproduce the warning, the filesystem should be 1 GiB. It's triggering a short-write, due to not being able to allocate a large extent and instead allocating a smaller one. WARNING: CPU: 0 PID: 757 at fs/btrfs/inode.c:8848 btrfs_destroy_inode+0x1e6/0x210 [btrfs] Modules linked in: btrfs blake2b_generic xor lzo_compress lzo_decompress raid6_pq zstd zstd_decompress zstd_compress xxhash zram zsmalloc CPU: 0 PID: 757 Comm: umount Not tainted 5.17.0-rc8+ #101 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS d55cb5a 04/01/2014 RIP: 0010:btrfs_destroy_inode+0x1e6/0x210 [btrfs] RSP: 0018:ffffc9000327bda8 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff888100548b78 RCX: 0000000000000000 RDX: 0000000000026900 RSI: 0000000000000000 RDI: ffff888100548b78 RBP: ffff888100548940 R08: 0000000000000000 R09: ffff88810b48aba8 R10: 0000000000000001 R11: ffff8881004eb240 R12: ffff88810b48a800 R13: ffff88810b48ec08 R14: ffff88810b48ed00 R15: ffff888100490c68 FS: 00007f8549ea0b80(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f854a09e733 CR3: 000000010a2e9003 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: destroy_inode+0x33/0x70 dispose_list+0x43/0x60 evict_inodes+0x161/0x1b0 generic_shutdown_super+0x2d/0x110 kill_anon_super+0xf/0x20 btrfs_kill_super+0xd/0x20 [btrfs] deactivate_locked_super+0x27/0x90 cleanup_mnt+0x12c/0x180 task_work_run+0x54/0x80 exit_to_user_mode_prepare+0x152/0x160 syscall_exit_to_user_mode+0x12/0x30 do_syscall_64+0x42/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f854a000fb7 Fixes: f0bfa76a11e9 ("btrfs: fix ENOSPC failure when attempting direct IO write into NOCOW range") CC: stable@vger.kernel.org # 5.17 Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Filipe Manana Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 286ab7c27db0..53a3f5e5ae89 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7442,6 +7442,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, u64 block_start, orig_start, orig_block_len, ram_bytes; bool can_nocow = false; bool space_reserved = false; + u64 prev_len; int ret = 0; /* @@ -7469,6 +7470,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, can_nocow = true; } + prev_len = len; if (can_nocow) { struct extent_map *em2; @@ -7498,8 +7500,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, goto out; } } else { - const u64 prev_len = len; - /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; @@ -7530,7 +7530,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, * We have created our ordered extent, so we can now release our reservation * for an outstanding extent. */ - btrfs_delalloc_release_extents(BTRFS_I(inode), len); + btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len); /* * Need to update the i_size under the extent lock so buffered From d03ae0d3b687223de24d3dd1a7bca96034aeca25 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 30 Mar 2022 12:14:05 +0300 Subject: [PATCH 272/708] btrfs: remove support of balance v1 ioctl It was scheduled for removal in kernel v5.18 commit 6c405b24097c ("btrfs: deprecate BTRFS_IOC_BALANCE ioctl") thus its time has come. Reviewed-by: Sweet Tea Dorminy Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f46e71061942..be6c24577dbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5456,8 +5456,6 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_fs_info(fs_info, argp); case BTRFS_IOC_DEV_INFO: return btrfs_ioctl_dev_info(fs_info, argp); - case BTRFS_IOC_BALANCE: - return btrfs_ioctl_balance(file, NULL); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(inode, argp); case BTRFS_IOC_TREE_SEARCH_V2: From a690e5f2db4d1dca742ce734aaff9f3112d63764 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 Mar 2022 15:55:58 +0900 Subject: [PATCH 273/708] btrfs: mark resumed async balance as writing When btrfs balance is interrupted with umount, the background balance resumes on the next mount. There is a potential deadlock with FS freezing here like as described in commit 26559780b953 ("btrfs: zoned: mark relocation as writing"). Mark the process as sb_writing to avoid it. Reviewed-by: Filipe Manana CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2cfbc74a3b4e..a8cc736731fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4430,10 +4430,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } From 820c363bd526ec8e133e4b84e6ad1fda12023b4b Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 22 Mar 2022 18:11:33 +0900 Subject: [PATCH 274/708] btrfs: return allocated block group from do_chunk_alloc() Return the allocated block group from do_chunk_alloc(). This is a preparation patch for the next patch. CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9ad265066225..c2b898fe7b24 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3451,7 +3451,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) { struct btrfs_block_group *bg; int ret; @@ -3538,7 +3538,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) out: btrfs_trans_release_chunk_metadata(trans); - return ret; + if (ret) + return ERR_PTR(ret); + + btrfs_get_block_group(bg); + return bg; } /* @@ -3653,6 +3657,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; + struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; int ret = 0; @@ -3746,9 +3751,14 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - ret = do_chunk_alloc(trans, flags); + ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; + if (IS_ERR(ret_bg)) + ret = PTR_ERR(ret_bg); + else + btrfs_put_block_group(ret_bg); + spin_lock(&space_info->lock); if (ret < 0) { if (ret == -ENOSPC) From 760e69c4c2e2f475a812bdd414b62758215ce9cb Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 22 Mar 2022 18:11:34 +0900 Subject: [PATCH 275/708] btrfs: zoned: activate block group only for extent allocation In btrfs_make_block_group(), we activate the allocated block group, expecting that the block group is soon used for allocation. However, the chunk allocation from flush_space() context broke the assumption. There can be a large time gap between the chunk allocation time and the extent allocation time from the chunk. Activating the empty block groups pre-allocated from flush_space() context can exhaust the active zone counter of a device. Once we use all the active zone counts for empty pre-allocated block groups, we cannot activate new block group for the other things: metadata, tree-log, or data relocation block group. That failure results in a fake -ENOSPC. This patch introduces CHUNK_ALLOC_FORCE_FOR_EXTENT to distinguish the chunk allocation from find_free_extent(). Now, the new block group is activated only in that context. Fixes: eb66a010d518 ("btrfs: zoned: activate new block group") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 24 ++++++++++++++++-------- fs/btrfs/block-group.h | 4 ++++ fs/btrfs/extent-tree.c | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c2b898fe7b24..0dd6de994199 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2503,12 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - /* - * New block group is likely to be used soon. Try to activate it now. - * Failure is OK for now. - */ - btrfs_zone_activate(cache); - ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ @@ -3660,8 +3654,14 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; + bool from_extent_allocation = false; int ret = 0; + if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) { + from_extent_allocation = true; + force = CHUNK_ALLOC_FORCE; + } + /* Don't re-enter if we're already allocating a chunk */ if (trans->allocating_chunk) return -ENOSPC; @@ -3754,9 +3754,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; - if (IS_ERR(ret_bg)) + if (IS_ERR(ret_bg)) { ret = PTR_ERR(ret_bg); - else + } else if (from_extent_allocation) { + /* + * New block group is likely to be used soon. Try to activate + * it now. Failure is OK for now. + */ + btrfs_zone_activate(ret_bg); + } + + if (!ret) btrfs_put_block_group(ret_bg); spin_lock(&space_info->lock); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 93aabc68bb6a..e8308f2ad07d 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -35,11 +35,15 @@ enum btrfs_discard_state { * the FS with empty chunks * * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from + * find_free_extent() that also activaes the zone */ enum btrfs_chunk_alloc_enum { CHUNK_ALLOC_NO_FORCE, CHUNK_ALLOC_LIMITED, CHUNK_ALLOC_FORCE, + CHUNK_ALLOC_FORCE_FOR_EXTENT, }; struct btrfs_caching_control { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f477035a2ac2..6aa92f84f465 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4082,7 +4082,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, } ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, - CHUNK_ALLOC_FORCE); + CHUNK_ALLOC_FORCE_FOR_EXTENT); /* Do not bail out on ENOSPC since we can do more. */ if (ret == -ENOSPC) From 168a2f776b9762f4021421008512dd7ab7474df1 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 24 Mar 2022 06:44:54 -0700 Subject: [PATCH 276/708] btrfs: fix root ref counts in error handling in btrfs_get_root_ref In btrfs_get_root_ref(), when btrfs_insert_fs_root() fails, btrfs_put_root() can happen for two reasons: - the root already exists in the tree, in that case it returns the reference obtained in btrfs_lookup_fs_root() - another error so the cleanup is done in the fail label Calling btrfs_put_root() unconditionally would lead to double decrement of the root reference possibly freeing it in the second case. Reported-by: TOTE Robot Fixes: bc44d7c4b2b1 ("btrfs: push btrfs_grab_fs_root into btrfs_get_fs_root") CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Jia-Ju Bai Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09693ab4fde0..cebd7a78c964 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1849,9 +1849,10 @@ again: ret = btrfs_insert_fs_root(fs_info, root); if (ret) { - btrfs_put_root(root); - if (ret == -EEXIST) + if (ret == -EEXIST) { + btrfs_put_root(root); goto again; + } goto fail; } return root; From acee08aaf6d158d03668dc82b0a0eef41100531b Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 31 Mar 2022 14:58:28 -0700 Subject: [PATCH 277/708] btrfs: fix btrfs_submit_compressed_write cgroup attribution This restores the logic from commit 46bcff2bfc5e ("btrfs: fix compressed write bio blkcg attribution") which added cgroup attribution to btrfs writeback. It also adds back the REQ_CGROUP_PUNT flag for these ios. Fixes: 91507240482e ("btrfs: determine stripe boundary at bio allocation time in btrfs_submit_compressed_write") CC: stable@vger.kernel.org # 5.16+ Signed-off-by: Dennis Zhou Signed-off-by: David Sterba --- fs/btrfs/compression.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index be476f094300..19bf36d8ffea 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -537,6 +537,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; + if (blkcg_css) + kthread_associate_blkcg(blkcg_css); + while (cur_disk_bytenr < disk_start + compressed_len) { u64 offset = cur_disk_bytenr - disk_start; unsigned int index = offset >> PAGE_SHIFT; @@ -555,6 +558,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio = NULL; goto finish_cb; } + if (blkcg_css) + bio->bi_opf |= REQ_CGROUP_PUNT; } /* * We should never reach next_stripe_start start as we will @@ -612,6 +617,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, return 0; finish_cb: + if (blkcg_css) + kthread_associate_blkcg(NULL); + if (bio) { bio->bi_status = ret; bio_endio(bio); From 82e32bc31e794f13cc028e8c709c4a217ff410ff Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 6 Apr 2022 00:55:40 +0200 Subject: [PATCH 278/708] ARM: config: Refresh U8500 defconfig This just updates the U8500 defconfig to reflect what has happened in the Kconfig: DRM_PANEL_SONY_ACX424AKP is now handled by DRM_PANEL_NOVATEK_NT35560, all ST sensors have SPI version drivers that we don't use, and some debug options moved around. Signed-off-by: Linus Walleij --- arch/arm/configs/u8500_defconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 3b30913d7d8d..c6d55bf76555 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -20,7 +20,6 @@ CONFIG_VFP=y CONFIG_NEON=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_CMA=y CONFIG_NET=y @@ -98,10 +97,10 @@ CONFIG_VIDEO_V4L2_SUBDEV_API=y CONFIG_V4L2_FLASH_LED_CLASS=y CONFIG_DRM=y CONFIG_DRM_PANEL_NOVATEK_NT35510=y +CONFIG_DRM_PANEL_NOVATEK_NT35560=y CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=y CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=y CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI=y -CONFIG_DRM_PANEL_SONY_ACX424AKP=y CONFIG_DRM_LIMA=y CONFIG_DRM_MCDE=y CONFIG_FB=y @@ -144,17 +143,21 @@ CONFIG_IIO_SW_TRIGGER=y CONFIG_BMA180=y CONFIG_BMC150_ACCEL=y CONFIG_IIO_ST_ACCEL_3AXIS=y +# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set CONFIG_IIO_RESCALE=y CONFIG_MPU3050_I2C=y CONFIG_IIO_ST_GYRO_3AXIS=y +# CONFIG_IIO_ST_GYRO_SPI_3AXIS is not set CONFIG_INV_MPU6050_I2C=y CONFIG_BH1780=y CONFIG_GP2AP002=y CONFIG_AK8974=y CONFIG_IIO_ST_MAGN_3AXIS=y +# CONFIG_IIO_ST_MAGN_SPI_3AXIS is not set CONFIG_YAMAHA_YAS530=y CONFIG_IIO_HRTIMER_TRIGGER=y CONFIG_IIO_ST_PRESS=y +# CONFIG_IIO_ST_PRESS_SPI is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -173,10 +176,9 @@ CONFIG_CRYPTO_DEV_UX500_CRYP=y CONFIG_CRYPTO_DEV_UX500_HASH=y CONFIG_CRYPTO_DEV_UX500_DEBUG=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y From 62c31868f528e8657947694913f1e76db816425b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 31 Mar 2022 14:31:51 +0200 Subject: [PATCH 279/708] media: platform: imx-mipi-csis: Add dependency on VIDEO_DEV The imx-mipi-csis driver (VIDEO_IMX_MIPI_CSIS) lost its dependency on VIDEO_DEV in commit 63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform"). This causes build failures with configurations that don't have VIDEO_DEV set. Fix it by restoring the dependency. Link: https://lore.kernel.org/linux-media/20220331123151.1953-1-laurent.pinchart@ideasonboard.com Fixes: 63fe3d27b226 ("media: platform/*/Kconfig: make manufacturer menus more uniform") Reported-by: kernel test robot Signed-off-by: Laurent Pinchart Signed-off-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/nxp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/nxp/Kconfig b/drivers/media/platform/nxp/Kconfig index 28f2bafc14d2..5afa373e534f 100644 --- a/drivers/media/platform/nxp/Kconfig +++ b/drivers/media/platform/nxp/Kconfig @@ -7,6 +7,7 @@ comment "NXP media platform drivers" config VIDEO_IMX_MIPI_CSIS tristate "NXP MIPI CSI-2 CSIS receiver found on i.MX7 and i.MX8 models" depends on ARCH_MXC || COMPILE_TEST + depends on VIDEO_DEV select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API From 1d7e4fd72bb9be080c23a099b0dff1007109fc2b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 5 Apr 2022 08:59:36 +0200 Subject: [PATCH 280/708] net: micrel: Fix KS8851 Kconfig KS8851 selects MICREL_PHY, which depends on PTP_1588_CLOCK_OPTIONAL, so make KS8851 also depend on PTP_1588_CLOCK_OPTIONAL. Fixes kconfig warning and build errors: WARNING: unmet direct dependencies detected for MICREL_PHY Depends on [m]: NETDEVICES [=y] && PHYLIB [=y] && PTP_1588_CLOCK_OPTIONAL [=m] Selected by [y]: - KS8851 [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICREL [=y] && SPI [=y] ld.lld: error: undefined symbol: ptp_clock_register referenced by micrel.c net/phy/micrel.o:(lan8814_probe) in archive drivers/built-in.a ld.lld: error: undefined symbol: ptp_clock_index referenced by micrel.c net/phy/micrel.o:(lan8814_ts_info) in archive drivers/built-in.a Reported-by: kernel test robot Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Tested-by: Randy Dunlap Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20220405065936.4105272-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/micrel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig index 1b632cdd7630..830363bafcce 100644 --- a/drivers/net/ethernet/micrel/Kconfig +++ b/drivers/net/ethernet/micrel/Kconfig @@ -28,6 +28,7 @@ config KS8842 config KS8851 tristate "Micrel KS8851 SPI" depends on SPI + depends on PTP_1588_CLOCK_OPTIONAL select MII select CRC32 select EEPROM_93CX6 From 3f2a3050b4a3e7f32fc0ea3c9b0183090ae00522 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 4 Apr 2022 12:41:50 +0200 Subject: [PATCH 281/708] net: openvswitch: don't send internal clone attribute to the userspace. 'OVS_CLONE_ATTR_EXEC' is an internal attribute that is used for performance optimization inside the kernel. It's added by the kernel while parsing user-provided actions and should not be sent during the flow dump as it's not part of the uAPI. The issue doesn't cause any significant problems to the ovs-vswitchd process, because reported actions are not really used in the application lifecycle and only supposed to be shown to a human via ovs-dpctl flow dump. However, the action list is still incorrect and causes the following error if the user wants to look at the datapath flows: # ovs-dpctl add-dp system@ovs-system # ovs-dpctl add-flow "" "clone(ct(commit),0)" # ovs-dpctl dump-flows , packets:0, bytes:0, used:never, actions:clone(bad length 4, expected -1 for: action0(01 00 00 00), ct(commit),0) With the fix: # ovs-dpctl dump-flows , packets:0, bytes:0, used:never, actions:clone(ct(commit),0) Additionally fixed an incorrect attribute name in the comment. Fixes: b233504033db ("openvswitch: kernel datapath clone action") Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Link: https://lore.kernel.org/r/20220404104150.2865736-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 2 +- net/openvswitch/flow_netlink.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7056cb1b8ba0..1b5d73079dc9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1051,7 +1051,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb, int rem = nla_len(attr); bool dont_clone_flow_key; - /* The first action is always 'OVS_CLONE_ATTR_ARG'. */ + /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */ clone_arg = nla_data(attr); dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index cc282a58b75b..dbdcaaa27f5b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -3458,7 +3458,9 @@ static int clone_action_to_attr(const struct nlattr *attr, if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); + /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ + attr = nla_next(nla_data(attr), &rem); + err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); From 11f8e7c122ce013fa745029fa8c94c6db69c2e54 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 5 Apr 2022 02:04:04 +0200 Subject: [PATCH 282/708] net: ethernet: mv643xx: Fix over zealous checking of_get_mac_address() There is often not a MAC address available in an EEPROM accessible by Linux with Marvell devices. Instead the bootload has the MAC address and directly programs it into the hardware. So don't consider an error from of_get_mac_address() has fatal. However, the check was added for the case where there is a MAC address in an the EEPROM, but the EEPROM has not probed yet, and -EPROBE_DEFER is returned. In that case the error should be returned. So make the check specific to this error code. Cc: Mauri Sandberg Reported-by: Thomas Walther Fixes: 42404d8f1c01 ("net: mv643xx_eth: process retval from of_get_mac_address") Signed-off-by: Andrew Lunn Link: https://lore.kernel.org/r/20220405000404.3374734-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 5f9ab1842d49..c18801490649 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2751,7 +2751,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, } ret = of_get_mac_address(pnp, ppd.mac_addr); - if (ret) + if (ret == -EPROBE_DEFER) return ret; mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); From 55b014159ee7af63770cd7f2b6fe926f6dd99335 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Apr 2022 10:57:51 +0900 Subject: [PATCH 283/708] ata: ahci: Rename CONFIG_SATA_LPM_POLICY configuration item back CONFIG_SATA_LPM_MOBILE_POLICY was renamed to CONFIG_SATA_LPM_POLICY in commit 4dd4d3deb502 ("ata: ahci: Rename CONFIG_SATA_LPM_MOBILE_POLICY configuration item"). This can potentially cause problems as users would invisibly lose configuration policy defaults when they built the new kernel. To avoid such problems, switch back to the old name (even if it's wrong). Suggested-by: Christoph Hellwig Suggested-by: Damien Le Moal Signed-off-by: Mario Limonciello Signed-off-by: Damien Le Moal --- drivers/ata/Kconfig | 6 ++++-- drivers/ata/ahci.c | 2 +- drivers/ata/ahci.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e5641e6c52ee..bb45a9c00514 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -115,14 +115,16 @@ config SATA_AHCI If unsure, say N. -config SATA_LPM_POLICY +config SATA_MOBILE_LPM_POLICY int "Default SATA Link Power Management policy for low power chipsets" range 0 4 default 0 depends on SATA_AHCI help Select the Default SATA Link Power Management (LPM) policy to use - for chipsets / "South Bridges" designated as supporting low power. + for chipsets / "South Bridges" supporting low-power modes. Such + chipsets are typically found on most laptops but desktops and + servers now also widely use chipsets supporting low power modes. The value set has the following meanings: 0 => Keep firmware settings diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 84456c05e845..397dfd27c90d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1595,7 +1595,7 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports, static void ahci_update_initial_lpm_policy(struct ata_port *ap, struct ahci_host_priv *hpriv) { - int policy = CONFIG_SATA_LPM_POLICY; + int policy = CONFIG_SATA_MOBILE_LPM_POLICY; /* Ignore processing for chipsets that don't use policy */ diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 6ead58c1b6e5..ad11a4c52fbe 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -236,7 +236,7 @@ enum { AHCI_HFLAG_NO_WRITE_TO_RO = (1 << 24), /* don't write to read only registers */ AHCI_HFLAG_USE_LPM_POLICY = (1 << 25), /* chipset that should use - SATA_LPM_POLICY + SATA_MOBILE_LPM_POLICY as default lpm_policy */ AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during suspend/resume */ From dd8adc713b1656ce469702eba8fc1adc4db91dc4 Mon Sep 17 00:00:00 2001 From: Li Yang Date: Mon, 7 Mar 2022 14:41:18 -0600 Subject: [PATCH 284/708] memory: fsl_ifc: populate child nodes of buses and mfd devices Commit 3e25f800afb8 ("memory: fsl_ifc: populate child devices without relying on simple-bus") was trying to replace the "simple-bus" compatible with explicit bus populate in the driver. But of_platform_populate() only populates child nodes of ifc without populating child buses and child mfd devices residing under ifc. Change it to of_platform_default_populate() to fix the problem. Fixes: 3e25f800afb8 ("memory: fsl_ifc: populate child devices without relying on simple-bus") Signed-off-by: Li Yang Link: https://lore.kernel.org/r/20220307204118.19093-1-leoyang.li@nxp.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/fsl_ifc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 2f6939da21cd..e83b61c925a4 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -287,8 +287,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) } /* legacy dts may still use "simple-bus" compatible */ - ret = of_platform_populate(dev->dev.of_node, NULL, NULL, - &dev->dev); + ret = of_platform_default_populate(dev->dev.of_node, NULL, &dev->dev); if (ret) goto err_free_nandirq; From 93bcdaca6eccb6761194fa8340672792e17f8f66 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 6 Apr 2022 10:28:51 +0200 Subject: [PATCH 285/708] ARM: config: u8500: Add some common hardware This activates display drivers that give console on the different U8500 mobile phones, the GNSS subsystem and the SIRF GNSS driver so we can manage the GPS chips, the regulator LEDs as used in some phones and one more IIO light sensor driver. Signed-off-by: Linus Walleij --- arch/arm/configs/u8500_defconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index c6d55bf76555..159dd98d7f74 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -40,6 +40,8 @@ CONFIG_MAC80211_LEDS=y CONFIG_CAIF=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_GNSS=y +CONFIG_GNSS_SIRF_SERIAL=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_NETDEVICES=y @@ -98,9 +100,12 @@ CONFIG_V4L2_FLASH_LED_CLASS=y CONFIG_DRM=y CONFIG_DRM_PANEL_NOVATEK_NT35510=y CONFIG_DRM_PANEL_NOVATEK_NT35560=y +CONFIG_DRM_PANEL_SAMSUNG_DB7430=y CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=y +CONFIG_DRM_PANEL_SAMSUNG_S6D27A1=y CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=y CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI=y +CONFIG_DRM_PANEL_WIDECHIPS_WS2401=y CONFIG_DRM_LIMA=y CONFIG_DRM_MCDE=y CONFIG_FB=y @@ -128,6 +133,7 @@ CONFIG_LEDS_LM3530=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_LP55XX_COMMON=y CONFIG_LEDS_LP5521=y +CONFIG_LEDS_REGULATOR=y CONFIG_LEDS_RT8515=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y @@ -151,6 +157,7 @@ CONFIG_IIO_ST_GYRO_3AXIS=y CONFIG_INV_MPU6050_I2C=y CONFIG_BH1780=y CONFIG_GP2AP002=y +CONFIG_TSL2772=y CONFIG_AK8974=y CONFIG_IIO_ST_MAGN_3AXIS=y # CONFIG_IIO_ST_MAGN_SPI_3AXIS is not set From 2da0aebc74dba6a09ac90b88e38860fbc65d6c0a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:36 +0000 Subject: [PATCH 286/708] KVM: arm64: Generally disallow SMC64 for AArch32 guests The only valid calling SMC calling convention from an AArch32 state is SMC32. Disallow any PSCI function that sets the SMC64 function ID bit when called from AArch32 rather than comparing against known SMC64 PSCI functions. Note that without this change KVM advertises the SMC64 flavor of SYSTEM_RESET2 to AArch32 guests. Fixes: d43583b890e7 ("KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest") Acked-by: Will Deacon Reviewed-by: Reiji Watanabe Reviewed-by: Andrew Jones Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-2-oupton@google.com --- arch/arm64/kvm/psci.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 372da09a2fab..a76d03d50624 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -215,15 +215,11 @@ static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) { - switch(fn) { - case PSCI_0_2_FN64_CPU_SUSPEND: - case PSCI_0_2_FN64_CPU_ON: - case PSCI_0_2_FN64_AFFINITY_INFO: - /* Disallow these functions for 32bit guests */ - if (vcpu_mode_is_32bit(vcpu)) - return PSCI_RET_NOT_SUPPORTED; - break; - } + /* + * Prevent 32 bit guests from calling 64 bit PSCI functions. + */ + if ((fn & PSCI_0_2_64BIT) && vcpu_mode_is_32bit(vcpu)) + return PSCI_RET_NOT_SUPPORTED; return 0; } From 827c2ab3314814e1c7d873372c0fe0cad50ba1c5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:37 +0000 Subject: [PATCH 287/708] KVM: arm64: Actually prevent SMC64 SYSTEM_RESET2 from AArch32 The SMCCC does not allow the SMC64 calling convention to be used from AArch32. While KVM checks to see if the calling convention is allowed in PSCI_1_0_FN_PSCI_FEATURES, it does not actually prevent calls to unadvertised PSCI v1.0+ functions. Hoist the check to see if the requested function is allowed into kvm_psci_call(), thereby preventing SMC64 calls from AArch32 for all PSCI versions. Fixes: d43583b890e7 ("KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest") Acked-by: Will Deacon Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-3-oupton@google.com --- arch/arm64/kvm/psci.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index a76d03d50624..faf403a72fdf 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -231,10 +231,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) unsigned long val; int ret = 1; - val = kvm_psci_check_allowed_function(vcpu, psci_fn); - if (val) - goto out; - switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: /* @@ -302,7 +298,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; } -out: smccc_set_retval(vcpu, val, 0, 0, 0); return ret; } @@ -422,6 +417,15 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) */ int kvm_psci_call(struct kvm_vcpu *vcpu) { + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + + val = kvm_psci_check_allowed_function(vcpu, psci_fn); + if (val) { + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; + } + switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_1_1: return kvm_psci_1_x_call(vcpu, 1); From 73b725c7a6c82eee10fa2d6752babefff795ca9a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 22 Mar 2022 18:35:38 +0000 Subject: [PATCH 288/708] KVM: arm64: Drop unneeded minor version check from PSCI v1.x handler We already sanitize the guest's PSCI version when it is being written by userspace, rejecting unsupported version numbers. Additionally, the 'minor' parameter to kvm_psci_1_x_call() is a constant known at compile time for all callsites. Though it is benign, the additional check against the PSCI kvm_psci_1_x_call() is unnecessary and likely to be missed the next time KVM raises its maximum PSCI version. Drop the check altogether and rely on sanitization when the PSCI version is set by userspace. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220322183538.2757758-4-oupton@google.com --- arch/arm64/kvm/psci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index faf403a72fdf..baac2b405f23 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -309,9 +309,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) unsigned long val; int ret = 1; - if (minor > 1) - return -EINVAL; - switch(psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1; From f587661f21eb9a38af52488bbe54ce61a64dfae8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 1 Apr 2022 19:46:52 +0000 Subject: [PATCH 289/708] KVM: arm64: Don't split hugepages outside of MMU write lock It is possible to take a stage-2 permission fault on a page larger than PAGE_SIZE. For example, when running a guest backed by 2M HugeTLB, KVM eagerly maps at the largest possible block size. When dirty logging is enabled on a memslot, KVM does *not* eagerly split these 2M stage-2 mappings and instead clears the write bit on the pte. Since dirty logging is always performed at PAGE_SIZE granularity, KVM lazily splits these 2M block mappings down to PAGE_SIZE in the stage-2 fault handler. This operation must be done under the write lock. Since commit f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging"), the stage-2 fault handler conditionally takes the read lock on permission faults with dirty logging enabled. To that end, it is possible to split a 2M block mapping while only holding the read lock. The problem is demonstrated by running kvm_page_table_test with 2M anonymous HugeTLB, which splats like so: WARNING: CPU: 5 PID: 15276 at arch/arm64/kvm/hyp/pgtable.c:153 stage2_map_walk_leaf+0x124/0x158 [...] Call trace: stage2_map_walk_leaf+0x124/0x158 stage2_map_walker+0x5c/0xf0 __kvm_pgtable_walk+0x100/0x1d4 __kvm_pgtable_walk+0x140/0x1d4 __kvm_pgtable_walk+0x140/0x1d4 kvm_pgtable_walk+0xa0/0xf8 kvm_pgtable_stage2_map+0x15c/0x198 user_mem_abort+0x56c/0x838 kvm_handle_guest_abort+0x1fc/0x2a4 handle_exit+0xa4/0x120 kvm_arch_vcpu_ioctl_run+0x200/0x448 kvm_vcpu_ioctl+0x588/0x664 __arm64_sys_ioctl+0x9c/0xd4 invoke_syscall+0x4c/0x144 el0_svc_common+0xc4/0x190 do_el0_svc+0x30/0x8c el0_svc+0x28/0xcc el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x1a4/0x1a8 Fix the issue by only acquiring the read lock if the guest faulted on a PAGE_SIZE granule w/ dirty logging enabled. Add a WARN to catch locking bugs in future changes. Fixes: f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging") Cc: Jing Zhang Signed-off-by: Oliver Upton Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220401194652.950240-1-oupton@google.com --- arch/arm64/kvm/mmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0d19259454d8..53ae2c0640bc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1079,7 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool logging_perm_fault = false; + bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1114,7 +1114,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - logging_perm_fault = (fault_status == FSC_PERM && write_fault); + use_read_lock = (fault_status == FSC_PERM && write_fault && + fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1218,7 +1219,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * logging dirty logging, only acquire read lock for permission * relaxation. */ - if (logging_perm_fault) + if (use_read_lock) read_lock(&kvm->mmu_lock); else write_lock(&kvm->mmu_lock); @@ -1268,6 +1269,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { + WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); + ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, memcache); @@ -1280,7 +1283,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (logging_perm_fault) + if (use_read_lock) read_unlock(&kvm->mmu_lock); else write_unlock(&kvm->mmu_lock); From c707663e81ef48d279719e97fd86acef835a2671 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Tue, 29 Mar 2022 03:20:59 -0700 Subject: [PATCH 290/708] KVM: arm64: vgic: Remove unnecessary type castings Remove unnecessary casts. Signed-off-by: Yu Zhe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329102059.268983-1-yuzhe@nfschina.com --- arch/arm64/kvm/vgic/vgic-debug.c | 10 +++++----- arch/arm64/kvm/vgic/vgic-its.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index f38c40a76251..78cde687383c 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -82,7 +82,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) static void *vgic_debug_start(struct seq_file *s, loff_t *pos) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter; mutex_lock(&kvm->lock); @@ -110,7 +110,7 @@ out: static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter = kvm->arch.vgic.iter; ++*pos; @@ -122,7 +122,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) static void vgic_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = (struct kvm *)s->private; + struct kvm *kvm = s->private; struct vgic_state_iter *iter; /* @@ -229,8 +229,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, static int vgic_debug_show(struct seq_file *s, void *v) { - struct kvm *kvm = (struct kvm *)s->private; - struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct kvm *kvm = s->private; + struct vgic_state_iter *iter = v; struct vgic_irq *irq; struct kvm_vcpu *vcpu = NULL; unsigned long flags; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 089fc2ffcb43..2e13402be3bd 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2143,7 +2143,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, void *ptr, void *opaque) { - struct its_device *dev = (struct its_device *)opaque; + struct its_device *dev = opaque; struct its_collection *collection; struct kvm *kvm = its->dev->kvm; struct kvm_vcpu *vcpu = NULL; From a2c0b0fbe01419f8f5d1c0b9c581631f34ffce8b Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 5 Apr 2022 11:47:33 +0100 Subject: [PATCH 291/708] arm64: alternatives: mark patch_alternative() as `noinstr` The alternatives code must be `noinstr` such that it does not patch itself, as the cache invalidation is only performed after all the alternatives have been applied. Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update() and get_alt_insn() with `__always_inline` since they are both only called through patch_alternative(). Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused a boot hang: [ 0.241121] CPU: All CPU(s) started at EL2 The alternatives code was patching the atomics in __tsan_read4() from LL/SC atomics to LSE atomics. The following fragment is using LL/SC atomics in the .text section: | <__tsan_unaligned_read4+304>: ldxr x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This LL/SC atomic sequence was to be replaced with LSE atomics. However since the alternatives code was instrumentable, __tsan_read4() was being called after only the first instruction was replaced, which led to the following code in memory: | <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2] | <__tsan_unaligned_read4+308>: add x6, x6, x5 | <__tsan_unaligned_read4+312>: stxr w7, x6, [x2] | <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304> This caused an infinite loop as the `stxr` instruction never completed successfully, so `w7` was always 0. Signed-off-by: Joey Gouly Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 3fb79b76e9d9..7bbf5104b7b7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; From 6203ac30297847ddc5e122ccdcbe9941fbc258e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 23 Jul 2021 19:45:53 +0200 Subject: [PATCH 292/708] s390: add z16 elf platform Add detection for machine types 0x3931 and 0x3932 and set ELF platform name to z16. Signed-off-by: Heiko Carstens --- arch/s390/kernel/processor.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7a74ea5f7531..aa0e0e7fc773 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -283,6 +283,10 @@ static int __init setup_elf_platform(void) case 0x8562: strcpy(elf_platform, "z15"); break; + case 0x3931: + case 0x3932: + strcpy(elf_platform, "z16"); + break; } return 0; } From e69a7ff8d5deefc81bd9ce00b3ece83950a88fe6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Jul 2021 14:28:08 +0200 Subject: [PATCH 293/708] s390: allow to compile with z16 optimizations Add config and compile options which allow to compile with z16 optimizations if the compiler supports it. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 19 +++++++++++++++++++ arch/s390/Makefile | 2 ++ 2 files changed, 21 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 77b5a03de13a..e084c72104f8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -255,6 +255,10 @@ config HAVE_MARCH_Z15_FEATURES def_bool n select HAVE_MARCH_Z14_FEATURES +config HAVE_MARCH_Z16_FEATURES + def_bool n + select HAVE_MARCH_Z15_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -312,6 +316,14 @@ config MARCH_Z15 and 8561 series). The kernel will be slightly faster but will not work on older machines. +config MARCH_Z16 + bool "IBM z16" + select HAVE_MARCH_Z16_FEATURES + depends on $(cc-option,-march=z16) + help + Select this to enable optimizations for IBM z16 (3931 and + 3932 series). + endchoice config MARCH_Z10_TUNE @@ -332,6 +344,9 @@ config MARCH_Z14_TUNE config MARCH_Z15_TUNE def_bool TUNE_Z15 || MARCH_Z15 && TUNE_DEFAULT +config MARCH_Z16_TUNE + def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -372,6 +387,10 @@ config TUNE_Z15 bool "IBM z15" depends on $(cc-option,-mtune=z15) +config TUNE_Z16 + bool "IBM z16" + depends on $(cc-option,-mtune=z16) + endchoice config 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7a65bca1e5af..e441b60b1812 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -42,6 +42,7 @@ mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 +mflags-$(CONFIG_MARCH_Z16) := -march=z16 export CC_FLAGS_MARCH := $(mflags-y) @@ -54,6 +55,7 @@ cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 +cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include From 26bf74bd9f6ff0f1545b4f0c92a37c232d076014 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Mon, 28 Mar 2022 20:19:23 -0700 Subject: [PATCH 294/708] KVM: arm64: mixed-width check should be skipped for uninitialized vCPUs KVM allows userspace to configure either all EL1 32bit or 64bit vCPUs for a guest. At vCPU reset, vcpu_allowed_register_width() checks if the vcpu's register width is consistent with all other vCPUs'. Since the checking is done even against vCPUs that are not initialized (KVM_ARM_VCPU_INIT has not been done) yet, the uninitialized vCPUs are erroneously treated as 64bit vCPU, which causes the function to incorrectly detect a mixed-width VM. Introduce KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED bits for kvm->arch.flags. A value of the EL1_32BIT bit indicates that the guest needs to be configured with all 32bit or 64bit vCPUs, and a value of the REG_WIDTH_CONFIGURED bit indicates if a value of the EL1_32BIT bit is valid (already set up). Values in those bits are set at the first KVM_ARM_VCPU_INIT for the guest based on KVM_ARM_VCPU_EL1_32BIT configuration for the vCPU. Check vcpu's register width against those new bits at the vcpu's KVM_ARM_VCPU_INIT (instead of against other vCPUs' register width). Fixes: 66e94d5cafd4 ("KVM: arm64: Prevent mixed-width VM creation") Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329031924.619453-2-reijiw@google.com --- arch/arm64/include/asm/kvm_emulate.h | 27 +++++++---- arch/arm64/include/asm/kvm_host.h | 10 +++++ arch/arm64/kvm/reset.c | 67 +++++++++++++++++++--------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d62405ce3e6d..7496deab025a 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -43,10 +43,22 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); +#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } +#else +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, + &kvm->arch.flags)); + + return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); +} +#endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { @@ -72,15 +84,14 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TVM; } - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) + if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - - /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. - */ - if (!vcpu_el1_is_32bit(vcpu)) + else + /* + * TID3: trap feature register accesses that we virtualise. + * For now this is conditional, since no AArch32 feature regs + * are currently virtualised. + */ vcpu->arch.hcr_el2 |= HCR_TID3; if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e3b25dc6c367..94a27a7520f4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -127,6 +127,16 @@ struct kvm_arch { #define KVM_ARCH_FLAG_MTE_ENABLED 1 /* At least one vCPU has ran in the VM */ #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 + /* + * The following two bits are used to indicate the guest's EL1 + * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT + * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. + * Otherwise, the guest's EL1 register width has not yet been + * determined yet. + */ +#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 +#define KVM_ARCH_FLAG_EL1_32BIT 4 + unsigned long flags; /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index ecc40c8cd6f6..6c70c6f61c70 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -181,27 +181,51 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) return 0; } -static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) +/** + * kvm_set_vm_width() - set the register width for the guest + * @vcpu: Pointer to the vcpu being configured + * + * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED + * in the VM flags based on the vcpu's requested register width, the HW + * capabilities and other options (such as MTE). + * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be + * consistent with the value of the FLAG_EL1_32BIT bit in the flags. + * + * Return: 0 on success, negative error code on failure. + */ +static int kvm_set_vm_width(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; + struct kvm *kvm = vcpu->kvm; bool is32bit; - unsigned long i; is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) - return false; - /* MTE is incompatible with AArch32 */ - if (kvm_has_mte(vcpu->kvm) && is32bit) - return false; + lockdep_assert_held(&kvm->lock); - /* Check that the vcpus are either all 32bit or all 64bit */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) - return false; + if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { + /* + * The guest's register width is already configured. + * Make sure that the vcpu is consistent with it. + */ + if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags)) + return 0; + + return -EINVAL; } - return true; + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) + return -EINVAL; + + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(kvm) && is32bit) + return -EINVAL; + + if (is32bit) + set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); + + set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags); + + return 0; } /** @@ -230,10 +254,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) u32 pstate; mutex_lock(&vcpu->kvm->lock); - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); + ret = kvm_set_vm_width(vcpu); + if (!ret) { + reset_state = vcpu->arch.reset_state; + WRITE_ONCE(vcpu->arch.reset_state.reset, false); + } mutex_unlock(&vcpu->kvm->lock); + if (ret) + return ret; + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -260,14 +290,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } - if (!vcpu_allowed_register_width(vcpu)) { - ret = -EINVAL; - goto out; - } - switch (vcpu->arch.target) { default: - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (vcpu_el1_is_32bit(vcpu)) { pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; From 2f5d27e6cf14efe652748bad89ee529ed5a5d577 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Mon, 28 Mar 2022 20:19:24 -0700 Subject: [PATCH 295/708] KVM: arm64: selftests: Introduce vcpu_width_config Introduce a test for aarch64 that ensures non-mixed-width vCPUs (all 64bit vCPUs or all 32bit vcPUs) can be configured, and mixed-width vCPUs cannot be configured. Reviewed-by: Andrew Jones Signed-off-by: Reiji Watanabe Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220329031924.619453-3-reijiw@google.com --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/vcpu_width_config.c | 122 ++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d1e8f5237469..573d93a1d61f 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -3,6 +3,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/psci_cpu_on_test +/aarch64/vcpu_width_config /aarch64/vgic_init /aarch64/vgic_irq /s390x/memop diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 21c2dbd21a81..681b173aa87c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test +TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c new file mode 100644 index 000000000000..6e9402679229 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. + * + * Copyright (c) 2022 Google LLC. + * + * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs + * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be + * configured. + */ + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + + +/* + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2. + */ +static int add_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + vm_vcpu_add(vm, 1); + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1, + * and run KVM_ARM_VCPU_INIT for another vCPU with @init2. + */ +static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1, + struct kvm_vcpu_init *init2) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + vm_vcpu_add(vm, 0); + vm_vcpu_add(vm, 1); + + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + if (ret) + goto free_exit; + + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + +free_exit: + kvm_vm_free(vm); + return ret; +} + +/* + * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be + * configured, and two mixed-width vCPUs cannot be configured. + * Each of those three cases, configure vCPUs in two different orders. + * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running + * KVM_ARM_VCPU_INIT for them. + * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, + * and then run those commands for another vCPU. + */ +int main(void) +{ + struct kvm_vcpu_init init1, init2; + struct kvm_vm *vm; + int ret; + + if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) { + print_skip("KVM_CAP_ARM_EL1_32BIT is not supported"); + exit(KSFT_SKIP); + } + + /* Get the preferred target type and copy that to init2 for later use */ + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1); + kvm_vm_free(vm); + init2 = init1; + + /* Test with 64bit vCPUs */ + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 64bit EL1 vCPUs failed unexpectedly"); + + /* Test with 32bit vCPUs */ + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init1); + TEST_ASSERT(ret == 0, + "Configuring 32bit EL1 vCPUs failed unexpectedly"); + + /* Test with mixed-width vCPUs */ + init1.features[0] = 0; + init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + ret = add_2vcpus_init_2vcpus(&init1, &init2); + TEST_ASSERT(ret != 0, + "Configuring mixed-width vCPUs worked unexpectedly"); + + return 0; +} From 697a1d44af8ba0477ee729e632f4ade37999249a Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 30 Mar 2022 12:25:43 +0100 Subject: [PATCH 296/708] tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry tlb_remove_huge_tlb_entry only considers PMD_SIZE and PUD_SIZE when updating the mmu_gather structure. Unfortunately on arm64 there are two additional huge page sizes that need to be covered: CONT_PTE_SIZE and CONT_PMD_SIZE. Where an end-user attempts to employ contiguous huge pages, a VM_BUG_ON can be experienced due to the fact that the tlb structure hasn't been correctly updated by the relevant tlb_flush_p.._range() call from tlb_remove_huge_tlb_entry. This patch adds inequality logic to the generic implementation of tlb_remove_huge_tlb_entry s.t. CONT_PTE_SIZE and CONT_PMD_SIZE are effectively covered on arm64. Also, as well as ptes, pmds and puds; p4ds are now considered too. Reported-by: David Hildenbrand Suggested-by: Peter Zijlstra (Intel) Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/linux-mm/811c5c8e-b3a2-85d2-049c-717f17c3a03a@redhat.com/ Signed-off-by: Steve Capper Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Reviewed-by: Catalin Marinas Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220330112543.863-1-steve.capper@arm.com Signed-off-by: Will Deacon --- include/asm-generic/tlb.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index fd7feb5c7894..eee6f7763a39 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - if (_sz == PMD_SIZE) \ - tlb_flush_pmd_range(tlb, address, _sz); \ - else if (_sz == PUD_SIZE) \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) From 1f30fb9166d4f15a1aa19449b9da871fe0ed4796 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 4 Apr 2022 17:43:45 +0200 Subject: [PATCH 297/708] net: openvswitch: fix leak of nested actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While parsing user-provided actions, openvswitch module may dynamically allocate memory and store pointers in the internal copy of the actions. So this memory has to be freed while destroying the actions. Currently there are only two such actions: ct() and set(). However, there are many actions that can hold nested lists of actions and ovs_nla_free_flow_actions() just jumps over them leaking the memory. For example, removal of the flow with the following actions will lead to a leak of the memory allocated by nf_ct_tmpl_alloc(): actions:clone(ct(commit),0) Non-freed set() action may also leak the 'dst' structure for the tunnel info including device references. Under certain conditions with a high rate of flow rotation that may cause significant memory leak problem (2MB per second in reporter's case). The problem is also hard to mitigate, because the user doesn't have direct control over the datapath flows generated by OVS. Fix that by iterating over all the nested actions and freeing everything that needs to be freed recursively. New build time assertion should protect us from this problem if new actions will be added in the future. Unfortunately, openvswitch module doesn't use NLA_F_NESTED, so all attributes has to be explicitly checked. sample() and clone() actions are mixing extra attributes into the user-provided action list. That prevents some code generalization too. Fixes: 34ae932a4036 ("openvswitch: Make tunnel set action attach a metadata dst") Link: https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392922.html Reported-by: Stéphane Graber Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 95 ++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 5 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dbdcaaa27f5b..7176156d3844 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2317,6 +2317,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) return sfa; } +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len); + +static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action) +{ + const struct nlattr *a; + int rem; + + nla_for_each_nested(a, action, rem) { + switch (nla_type(a)) { + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } + } +} + +static void ovs_nla_free_clone_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_CLONE_ATTR_EXEC: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + +static void ovs_nla_free_dec_ttl_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + + switch (nla_type(a)) { + case OVS_DEC_TTL_ATTR_ACTION: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } +} + +static void ovs_nla_free_sample_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_ARG: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + static void ovs_nla_free_set_action(const struct nlattr *a) { const struct nlattr *ovs_key = nla_data(a); @@ -2330,25 +2386,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a) } } -void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) { const struct nlattr *a; int rem; - if (!sf_acts) + /* Whenever new actions are added, the need to update this + * function should be considered. + */ + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + + if (!actions) return; - nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { + nla_for_each_attr(a, actions, len, rem) { switch (nla_type(a)) { - case OVS_ACTION_ATTR_SET: - ovs_nla_free_set_action(a); + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + ovs_nla_free_check_pkt_len_action(a); break; + + case OVS_ACTION_ATTR_CLONE: + ovs_nla_free_clone_action(a); + break; + case OVS_ACTION_ATTR_CT: ovs_ct_free_action(a); break; + + case OVS_ACTION_ATTR_DEC_TTL: + ovs_nla_free_dec_ttl_action(a); + break; + + case OVS_ACTION_ATTR_SAMPLE: + ovs_nla_free_sample_action(a); + break; + + case OVS_ACTION_ATTR_SET: + ovs_nla_free_set_action(a); + break; } } +} +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + if (!sf_acts) + return; + + ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len); kfree(sf_acts); } From 62f6424514991ce6104f6a8becfd58e986f993b6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 6 Apr 2022 14:37:20 +0200 Subject: [PATCH 298/708] ARM: config: u8500: Re-enable AB8500 battery charging This is effectively a revert of the temporary disablement patch. Battery charging now works! We also enable static battery data for the Samsung SDI batteries as used by the U8500 Samsung phones. Cc: Lee Jones Fixes: a1149ae97554 ("ARM: ux500: Disable Power Supply and Battery Management by default") Signed-off-by: Linus Walleij --- arch/arm/configs/u8500_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 159dd98d7f74..a352207a64d7 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -84,6 +84,8 @@ CONFIG_SPI_GPIO=y CONFIG_SPI_PL022=y CONFIG_GPIO_STMPE=y CONFIG_GPIO_TC3589X=y +CONFIG_BATTERY_SAMSUNG_SDI=y +CONFIG_AB8500_BM=y CONFIG_SENSORS_IIO_HWMON=y CONFIG_SENSORS_NTC_THERMISTOR=y CONFIG_THERMAL=y From 1946014ca3b19be9e485e780e862c375c6f98bad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Apr 2022 11:34:39 -0700 Subject: [PATCH 299/708] rxrpc: fix a race in rxrpc_exit_net() Current code can lead to the following race: CPU0 CPU1 rxrpc_exit_net() rxrpc_peer_keepalive_worker() if (rxnet->live) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); cancel_work_sync(&rxnet->peer_keepalive_work); rxrpc_exit_net() exits while peer_keepalive_timer is still armed, leading to use-after-free. syzbot report was: ODEBUG: free active (active state 0) object type: timer_list hint: rxrpc_peer_keepalive_timeout+0x0/0xb0 WARNING: CPU: 0 PID: 3660 at lib/debugobjects.c:505 debug_print_object+0x16e/0x250 lib/debugobjects.c:505 Modules linked in: CPU: 0 PID: 3660 Comm: kworker/u4:6 Not tainted 5.17.0-syzkaller-13993-g88e6c0207623 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:debug_print_object+0x16e/0x250 lib/debugobjects.c:505 Code: ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 af 00 00 00 48 8b 14 dd 00 1c 26 8a 4c 89 ee 48 c7 c7 00 10 26 8a e8 b1 e7 28 05 <0f> 0b 83 05 15 eb c5 09 01 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e c3 RSP: 0018:ffffc9000353fb00 EFLAGS: 00010082 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 RDX: ffff888029196140 RSI: ffffffff815efad8 RDI: fffff520006a7f52 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815ea4ae R11: 0000000000000000 R12: ffffffff89ce23e0 R13: ffffffff8a2614e0 R14: ffffffff816628c0 R15: dffffc0000000000 FS: 0000000000000000(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe1f2908924 CR3: 0000000043720000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __debug_check_no_obj_freed lib/debugobjects.c:992 [inline] debug_check_no_obj_freed+0x301/0x420 lib/debugobjects.c:1023 kfree+0xd6/0x310 mm/slab.c:3809 ops_free_list.part.0+0x119/0x370 net/core/net_namespace.c:176 ops_free_list net/core/net_namespace.c:174 [inline] cleanup_net+0x591/0xb00 net/core/net_namespace.c:598 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Signed-off-by: Eric Dumazet Cc: David Howells Cc: Marc Dionne Cc: linux-afs@lists.infradead.org Reported-by: syzbot Signed-off-by: David S. Miller --- net/rxrpc/net_ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 25bbc4cc8b13..f15d6942da45 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -113,8 +113,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); rxnet->live = false; - del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); From fb5833d81e4333294add35d3ac7f7f52a7bf107f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 5 Apr 2022 08:45:44 +0000 Subject: [PATCH 300/708] net: sfc: fix using uninitialized xdp tx_queue In some cases, xdp tx_queue can get used before initialization. 1. interface up/down 2. ring buffer size change When CPU cores are lower than maximum number of channels of sfc driver, it creates new channels only for XDP. When an interface is up or ring buffer size is changed, all channels are initialized. But xdp channels are always initialized later. So, the below scenario is possible. Packets are received to rx queue of normal channels and it is acted XDP_TX and tx_queue of xdp channels get used. But these tx_queues are not initialized yet. If so, TX DMA or queue error occurs. In order to avoid this problem. 1. initializes xdp tx_queues earlier than other rx_queue in efx_start_channels(). 2. checks whether tx_queue is initialized or not in efx_xdp_tx_buffers(). Splat looks like: sfc 0000:08:00.1 enp8s0f1np1: TX queue 10 spurious TX completion id 250 sfc 0000:08:00.1 enp8s0f1np1: resetting (RECOVER_OR_ALL) sfc 0000:08:00.1 enp8s0f1np1: MC command 0x80 inlen 100 failed rc=-22 (raw=22) arg=789 sfc 0000:08:00.1 enp8s0f1np1: has been disabled Fixes: f28100cb9c96 ("sfc: fix lack of XDP TX queues - error XDP TX failed (-22)") Acked-by: Martin Habets Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 2 +- drivers/net/ethernet/sfc/tx.c | 3 +++ drivers/net/ethernet/sfc/tx_common.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 83e27231fbe6..377df8b7f015 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -1140,7 +1140,7 @@ void efx_start_channels(struct efx_nic *efx) struct efx_rx_queue *rx_queue; struct efx_channel *channel; - efx_for_each_channel(channel, efx) { + efx_for_each_channel_rev(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) { efx_init_tx_queue(tx_queue); atomic_inc(&efx->active_queues); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index d16e031e95f4..6983799e1c05 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -443,6 +443,9 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (unlikely(!tx_queue)) return -EINVAL; + if (!tx_queue->initialised) + return -EINVAL; + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index d530cde2b864..9bc8281b7f5b 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -101,6 +101,8 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, "shutting down TX queue %d\n", tx_queue->queue); + tx_queue->initialised = false; + if (!tx_queue->buffer) return; From d1c4f93e3f0a023024a6f022a61528c06cf1daa9 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 5 Apr 2022 17:19:26 +0800 Subject: [PATCH 301/708] net: axienet: setup mdio unconditionally The call to axienet_mdio_setup should not depend on whether "phy-node" pressents on the DT. Besides, since `lp->phy_node` is used if PHY is in SGMII or 100Base-X modes, move it into the if statement. And the next patch will remove `lp->phy_node` from driver's private structure and do an of_node_put on it right away after use since it is not used elsewhere. Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Robert Hancock Reviewed-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index c7eb05e4a6bf..78a991bbbcf9 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2064,15 +2064,14 @@ static int axienet_probe(struct platform_device *pdev) if (ret) goto cleanup_clk; - lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); - if (lp->phy_node) { - ret = axienet_mdio_setup(lp); - if (ret) - dev_warn(&pdev->dev, - "error registering MDIO bus: %d\n", ret); - } + ret = axienet_mdio_setup(lp); + if (ret) + dev_warn(&pdev->dev, + "error registering MDIO bus: %d\n", ret); + if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!lp->phy_node) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; From ab3a5d4c6081dbcfd90d19cc9849af89c6985d0f Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 5 Apr 2022 17:19:27 +0800 Subject: [PATCH 302/708] net: axienet: factor out phy_node in struct axienet_local the struct member `phy_node` of struct axienet_local is not used by the driver anymore after initialization. It might be a remnent of old code and could be removed. Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Robert Hancock Reviewed-by: Radhey Shyam Pandey Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 2 -- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 0f9c88dd1a4a..d5c1e5c4a508 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -433,8 +433,6 @@ struct axienet_local { struct net_device *ndev; struct device *dev; - struct device_node *phy_node; - struct phylink *phylink; struct phylink_config phylink_config; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 78a991bbbcf9..3daef64a85bd 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2071,17 +2071,19 @@ static int axienet_probe(struct platform_device *pdev) if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { - lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); - if (!lp->phy_node) { + np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!np) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; goto cleanup_mdio; } - lp->pcs_phy = of_mdio_find_device(lp->phy_node); + lp->pcs_phy = of_mdio_find_device(np); if (!lp->pcs_phy) { ret = -EPROBE_DEFER; + of_node_put(np); goto cleanup_mdio; } + of_node_put(np); lp->pcs.ops = &axienet_pcs_ops; lp->pcs.poll = true; } @@ -2124,8 +2126,6 @@ cleanup_mdio: put_device(&lp->pcs_phy->dev); if (lp->mii_bus) axienet_mdio_teardown(lp); - of_node_put(lp->phy_node); - cleanup_clk: clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); clk_disable_unprepare(lp->axi_clk); @@ -2154,9 +2154,6 @@ static int axienet_remove(struct platform_device *pdev) clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); clk_disable_unprepare(lp->axi_clk); - of_node_put(lp->phy_node); - lp->phy_node = NULL; - free_netdev(ndev); return 0; From dc48f04fd6562de6019e9fc7ed9ed539d632babb Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 5 Apr 2022 17:19:28 +0800 Subject: [PATCH 303/708] dt-bindings: net: add pcs-handle attribute Document the new pcs-handle attribute to support connecting to an external PHY. For Xilinx's AXI Ethernet, this is used when the core operates in SGMII or 1000Base-X modes and links through the internal PCS/PMA PHY. Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Rob Herring Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ethernet-controller.yaml | 6 ++++++ Documentation/devicetree/bindings/net/xilinx_axienet.txt | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 817794e56227..4f15463611f8 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -106,6 +106,12 @@ properties: phy-mode: $ref: "#/properties/phy-connection-type" + pcs-handle: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Specifies a reference to a node representing a PCS PHY device on a MDIO + bus to link with an external PHY (phy-handle) if exists. + phy-handle: $ref: /schemas/types.yaml#/definitions/phandle description: diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt index b8e4894bc634..1aa4c6006cd0 100644 --- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -26,7 +26,8 @@ Required properties: specified, the TX/RX DMA interrupts should be on that node instead, and only the Ethernet core interrupt is optionally specified here. -- phy-handle : Should point to the external phy device. +- phy-handle : Should point to the external phy device if exists. Pointing + this to the PCS/PMA PHY is deprecated and should be avoided. See ethernet.txt file in the same directory. - xlnx,rxmem : Set to allocated memory buffer for Rx/Tx in the hardware @@ -68,6 +69,11 @@ Optional properties: required through the core's MDIO interface (i.e. always, unless the PHY is accessed through a different bus). + - pcs-handle: Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X + modes, where "pcs-handle" should be used to point + to the PCS/PMA PHY, and "phy-handle" should point to an + external PHY if exists. + Example: axi_ethernet_eth: ethernet@40c00000 { compatible = "xlnx,axi-ethernet-1.00.a"; From 19c7a43912c61a3bcc09f220cd8681d35c1bec79 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 5 Apr 2022 17:19:29 +0800 Subject: [PATCH 304/708] net: axiemac: use a phandle to reference pcs_phy In some SGMII use cases where both a fixed link external PHY and the internal PCS/PMA PHY need to be configured, we should explicitly use a phandle "pcs-phy" to get the reference to the PCS/PMA PHY. Otherwise, the driver would use "phy-handle" in the DT as the reference to both the external and the internal PCS/PMA PHY. In other cases where the core is connected to a SFP cage, we could still point phy-handle to the intenal PCS/PMA PHY, and let the driver connect to the SFP module, if exist, via phylink. Signed-off-by: Andy Chiu Reviewed-by: Greentime Hu Reviewed-by: Robert Hancock Reviewed-by: Andrew Lunn Reviewed-by: Radhey Shyam Pandey Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3daef64a85bd..d6fc3f7acdf0 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2071,9 +2071,16 @@ static int axienet_probe(struct platform_device *pdev) if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { - np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + np = of_parse_phandle(pdev->dev.of_node, "pcs-handle", 0); if (!np) { - dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); + /* Deprecated: Always use "pcs-handle" for pcs_phy. + * Falling back to "phy-handle" here is only for + * backward compatibility with old device trees. + */ + np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + } + if (!np) { + dev_err(&pdev->dev, "pcs-handle (preferred) or phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; goto cleanup_mdio; } From 8d90991e5bf7fdb9f264f5f579d18969913054b7 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 5 Apr 2022 14:02:33 +0200 Subject: [PATCH 305/708] net: phy: mscc-miim: reject clause 45 register accesses The driver doesn't support clause 45 register access yet, but doesn't check if the access is a c45 one either. This leads to spurious register reads and writes. Add the check. Fixes: 542671fe4d86 ("net: phy: mscc-miim: Add MDIO driver") Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-mscc-miim.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index c483ba67c21f..582969751b4c 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -102,6 +102,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum) u32 val; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret) goto out; @@ -145,6 +148,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id, struct mscc_miim_dev *miim = bus->priv; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret < 0) goto out; From aba120cc101788544aa3e2c30c8da88513892350 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 5 Apr 2022 16:40:51 +0200 Subject: [PATCH 306/708] random: do not allow user to keep crng key around on stack The fast key erasure RNG design relies on the key that's used to be used and then discarded. We do this, making judicious use of memzero_explicit(). However, reads to /dev/urandom and calls to getrandom() involve a copy_to_user(), and userspace can use FUSE or userfaultfd, or make a massive call, dynamically remap memory addresses as it goes, and set the process priority to idle, in order to keep a kernel stack alive indefinitely. By probing /proc/sys/kernel/random/entropy_avail to learn when the crng key is refreshed, a malicious userspace could mount this attack every 5 minutes thereafter, breaking the crng's forward secrecy. In order to fix this, we just overwrite the stack's key with the first 32 bytes of the "free" fast key erasure output. If we're returning <= 32 bytes to the user, then we can still return those bytes directly, so that short reads don't become slower. And for long reads, the difference is hopefully lost in the amortization, so it doesn't change much, with that amortization helping variously for medium reads. We don't need to do this for get_random_bytes() and the various kernel-space callers, and later, if we ever switch to always batching, this won't be necessary either, so there's no need to change the API of these functions. Cc: Theodore Ts'o Reviewed-by: Jann Horn Fixes: c92e040d575a ("random: add backtracking protection to the CRNG") Fixes: 186873c549df ("random: use simpler fast key erasure flow on per-cpu keys") Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 388025d6d38d..47f01b1482a9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -532,19 +532,29 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (!nbytes) return 0; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); + /* + * Immediately overwrite the ChaCha key at index 4 with random + * bytes, in case userspace causes copy_to_user() below to sleep + * forever, so that we still retain forward secrecy in that case. + */ + crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); + /* + * However, if we're doing a read of len <= 32, we don't need to + * use chacha_state after, so we can simply return those bytes to + * the user directly. + */ + if (nbytes <= CHACHA_KEY_SIZE) { + ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + goto out_zero_chacha; + } - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; - - while (nbytes) { + do { if (large_request && need_resched()) { - if (signal_pending(current)) + if (signal_pending(current)) { + if (!ret) + ret = -ERESTARTSYS; break; + } schedule(); } @@ -561,10 +571,11 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) nbytes -= len; buf += len; ret += len; - } + } while (nbytes); - memzero_explicit(chacha_state, sizeof(chacha_state)); memzero_explicit(output, sizeof(output)); +out_zero_chacha: + memzero_explicit(chacha_state, sizeof(chacha_state)); return ret; } From 1448769c9cdb69ad65287f4f7ab58bc5f2f5d7ba Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 5 Apr 2022 18:39:31 +0200 Subject: [PATCH 307/708] random: check for signal_pending() outside of need_resched() check signal_pending() checks TIF_NOTIFY_SIGNAL and TIF_SIGPENDING, which signal that the task should bail out of the syscall when possible. This is a separate concept from need_resched(), which checks TIF_NEED_RESCHED, signaling that the task should preempt. In particular, with the current code, the signal_pending() bailout probably won't work reliably. Change this to look like other functions that read lots of data, such as read_zero(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 47f01b1482a9..394cbd814a0b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -549,13 +549,13 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) } do { - if (large_request && need_resched()) { + if (large_request) { if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } - schedule(); + cond_resched(); } chacha20_block(chacha_state, output); From b3d6dd09ff00fdcf4f7c0cb54700ffd5dd343502 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 25 Mar 2022 10:32:11 +0800 Subject: [PATCH 308/708] Drivers: hv: balloon: Support status report for larger page sizes DM_STATUS_REPORT expects the numbers of pages in the unit of 4k pages (HV_HYP_PAGE) instead of guest pages, so to make it work when guest page sizes are larger than 4k, convert the numbers of guest pages into the numbers of HV_HYP_PAGEs. Note that the numbers of guest pages are still used for tracing because tracing is internal to the guest kernel. Reported-by: Vitaly Kuznetsov Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220325023212.1570049-2-boqun.feng@gmail.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index f2d05bff4245..062156b88a87 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm) struct dm_status status; unsigned long now = jiffies; unsigned long last_post = last_post_time; + unsigned long num_pages_avail, num_pages_committed; if (pressure_report_delay > 0) { --pressure_report_delay; @@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm) * num_pages_onlined) as committed to the host, otherwise it can try * asking us to balloon them out. */ - status.num_avail = si_mem_available(); - status.num_committed = vm_memory_committed() + + num_pages_avail = si_mem_available(); + num_pages_committed = vm_memory_committed() + dm->num_pages_ballooned + (dm->num_pages_added > dm->num_pages_onlined ? dm->num_pages_added - dm->num_pages_onlined : 0) + compute_balloon_floor(); - trace_balloon_status(status.num_avail, status.num_committed, + trace_balloon_status(num_pages_avail, num_pages_committed, vm_memory_committed(), dm->num_pages_ballooned, dm->num_pages_added, dm->num_pages_onlined); + + /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ + status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; + status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; + /* * If our transaction ID is no longer current, just don't * send the status. This can happen if we were interrupted From be5802795cf8d0b881745fa9ba7790293b382280 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 25 Mar 2022 10:32:12 +0800 Subject: [PATCH 309/708] Drivers: hv: balloon: Disable balloon and hot-add accordingly Currently there are known potential issues for balloon and hot-add on ARM64: * Unballoon requests from Hyper-V should only unballoon ranges that are guest page size aligned, otherwise guests cannot handle because it's impossible to partially free a page. This is a problem when guest page size > 4096 bytes. * Memory hot-add requests from Hyper-V should provide the NUMA node id of the added ranges or ARM64 should have a functional memory_add_physaddr_to_nid(), otherwise the node id is missing for add_memory(). These issues require discussions on design and implementation. In the meanwhile, post_status() is working and essential to guest monitoring. Therefore instead of disabling the entire hv_balloon driver, the ballooning (when page size > 4096 bytes) and hot-add are disabled accordingly for now. Once the issues are fixed, they can be re-enable in these cases. Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220325023212.1570049-3-boqun.feng@gmail.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 062156b88a87..eee7402cfc02 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1660,6 +1660,38 @@ static void disable_page_reporting(void) } } +static int ballooning_enabled(void) +{ + /* + * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), + * since currently it's unclear to us whether an unballoon request can + * make sure all page ranges are guest page size aligned. + */ + if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { + pr_info("Ballooning disabled because page size is not 4096 bytes\n"); + return 0; + } + + return 1; +} + +static int hot_add_enabled(void) +{ + /* + * Disable hot add on ARM64, because we currently rely on + * memory_add_physaddr_to_nid() to get a node id of a hot add range, + * however ARM64's memory_add_physaddr_to_nid() always return 0 and + * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for + * add_memory(). + */ + if (IS_ENABLED(CONFIG_ARM64)) { + pr_info("Memory hot add disabled on ARM64\n"); + return 0; + } + + return 1; +} + static int balloon_connect_vsp(struct hv_device *dev) { struct dm_version_request version_req; @@ -1731,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev) * currently still requires the bits to be set, so we have to add code * to fail the host's hot-add and balloon up/down requests, if any. */ - cap_msg.caps.cap_bits.balloon = 1; - cap_msg.caps.cap_bits.hot_add = 1; + cap_msg.caps.cap_bits.balloon = ballooning_enabled(); + cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* * Specify our alignment requirements as it relates From eaa03d34535872d29004cb5cf77dc9dec1ba9a25 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 28 Mar 2022 17:44:57 +0200 Subject: [PATCH 310/708] Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb() Following the recommendation in Documentation/memory-barriers.txt for virtual machine guests. Fixes: 8b6a877c060ed ("Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels") Signed-off-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/20220328154457.100872-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 60375879612f..67be81208a2d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * execute: * * (a) In the "normal (i.e., not resuming from hibernation)" path, - * the full barrier in smp_store_mb() guarantees that the store + * the full barrier in virt_store_mb() guarantees that the store * is propagated to all CPUs before the add_channel_work work * is queued. In turn, add_channel_work is queued before the * channel's ring buffer is allocated/initialized and the @@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * recv_int_page before retrieving the channel pointer from the * array of channels. * - * (b) In the "resuming from hibernation" path, the smp_store_mb() + * (b) In the "resuming from hibernation" path, the virt_store_mb() * guarantees that the store is propagated to all CPUs before * the VMBus connection is marked as ready for the resume event * (cf. check_ready_for_resume_event()). The interrupt handler * of the VMBus driver and vmbus_chan_sched() can not run before * vmbus_bus_resume() has completed execution (cf. resume_noirq). */ - smp_store_mb( + virt_store_mb( vmbus_connection.channels[channel->offermsg.child_relid], channel); } From a3ebe92a0f2dfaeac257b685531decf8c9cd8eee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Apr 2022 12:04:45 +0200 Subject: [PATCH 311/708] net: ipv6mr: fix unused variable warning with CONFIG_IPV6_PIMSM_V2=n net/ipv6/ip6mr.c:1656:14: warning: unused variable 'do_wrmifwhole' Move it to the CONFIG_IPV6_PIMSM_V2 scope where its used. Fixes: 4b340a5a726d ("net: ip6mr: add support for passing full packet on wrong mif") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a9775c830194..4e74bc61a3db 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1653,7 +1653,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, mifi_t mifi; struct net *net = sock_net(sk); struct mr_table *mrt; - bool do_wrmifwhole; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1761,6 +1760,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: { + bool do_wrmifwhole; int v; if (optlen != sizeof(v)) From 4e910dbe36508654a896d5735b318c0b88172570 Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Wed, 6 Apr 2022 21:19:19 +1000 Subject: [PATCH 312/708] qede: confirm skb is allocated before using qede_build_skb() assumes build_skb() always works and goes straight to skb_reserve(). However, build_skb() can fail under memory pressure. This results in a kernel panic because the skb to reserve is NULL. Add a check in case build_skb() failed to allocate and return NULL. The NULL return is handled correctly in callers to qede_build_skb(). Fixes: 8a8633978b842 ("qede: Add build_skb() support.") Signed-off-by: Jamie Bainbridge Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_fp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index b242000a77fd..b7cc36589f59 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -748,6 +748,9 @@ qede_build_skb(struct qede_rx_queue *rxq, buf = page_address(bd->data) + bd->page_offset; skb = build_skb(buf, rxq->rx_buf_seg_size); + if (unlikely(!skb)) + return NULL; + skb_reserve(skb, pad); skb_put(skb, len); From afb8e246527536848b9b4025b40e613edf776a9d Mon Sep 17 00:00:00 2001 From: Marcin Kozlowski Date: Wed, 6 Apr 2022 10:05:37 +0200 Subject: [PATCH 313/708] net: usb: aqc111: Fix out-of-bounds accesses in RX fixup aqc111_rx_fixup() contains several out-of-bounds accesses that can be triggered by a malicious (or defective) USB device, in particular: - The metadata array (desc_offset..desc_offset+2*pkt_count) can be out of bounds, causing OOB reads and (on big-endian systems) OOB endianness flips. - A packet can overlap the metadata array, causing a later OOB endianness flip to corrupt data used by a cloned SKB that has already been handed off into the network stack. - A packet SKB can be constructed whose tail is far beyond its end, causing out-of-bounds heap data to be considered part of the SKB's data. Found doing variant analysis. Tested it with another driver (ax88179_178a), since I don't have a aqc111 device to test it, but the code looks very similar. Signed-off-by: Marcin Kozlowski Signed-off-by: David S. Miller --- drivers/net/usb/aqc111.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index ea06d10e1c21..ca409d450a29 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (start_of_descs != desc_offset) goto err; - /* self check desc_offset from header*/ - if (desc_offset >= skb_len) + /* self check desc_offset from header and make sure that the + * bounds of the metadata array are inside the SKB + */ + if (pkt_count * 2 + desc_offset >= skb_len) goto err; + /* Packets must not overlap the metadata array */ + skb_trim(skb, desc_offset); + if (pkt_count == 0) goto err; From b423e54ba965b4469b48e46fd16941f1e1701697 Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Wed, 6 Apr 2022 11:55:56 +0800 Subject: [PATCH 314/708] myri10ge: fix an incorrect free for skb in myri10ge_sw_tso All remaining skbs should be released when myri10ge_xmit fails to transmit a packet. Fix it within another skb_list_walk_safe. Signed-off-by: Xiaomeng Tong Signed-off-by: David S. Miller --- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 50ac3ee2577a..21d2645885ce 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2903,11 +2903,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, status = myri10ge_xmit(curr, dev); if (status != 0) { dev_kfree_skb_any(curr); - if (segs != NULL) { - curr = segs; - segs = next; + skb_list_walk_safe(next, curr, next) { curr->next = NULL; - dev_kfree_skb_any(segs); + dev_kfree_skb_any(curr); } goto drop; } From 4ded53ea0c7d46fa043efc7320e17ca443a1debb Mon Sep 17 00:00:00 2001 From: Enze Li Date: Fri, 1 Apr 2022 22:18:42 +0100 Subject: [PATCH 315/708] cdrom: remove unused variable The clang static analyzer reports the following warning, File: drivers/cdrom/cdrom.c Warning: line 1380, column 7 Although the value stored to 'status' is used in enclosing expression, the value is never actually read from 'status' Remove the unused variable to eliminate the warning. Signed-off-by: Enze Li Link: https://lore.kernel.org/all/20220401032623.293666-1-lienze@kylinos.cn Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20220401211842.2088096-1-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 7bd10d63ddbe..2dc9da683a13 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1365,7 +1365,6 @@ out_free: */ int cdrom_number_of_slots(struct cdrom_device_info *cdi) { - int status; int nslots = 1; struct cdrom_changer_info *info; @@ -1377,7 +1376,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi) if (!info) return -ENOMEM; - if ((status = cdrom_read_mech_status(cdi, info)) == 0) + if (cdrom_read_mech_status(cdi, info) == 0) nslots = info->hdr.nslots; kfree(info); From 409543cec01a84610029d6440c480c3fdd7214fb Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 6 Apr 2022 18:52:38 +0530 Subject: [PATCH 316/708] spi: core: add dma_map_dev for __spi_unmap_msg() Commit b470e10eb43f ("spi: core: add dma_map_dev for dma device") added dma_map_dev for _spi_map_msg() but missed to add for unmap routine, __spi_unmap_msg(), so add it now. Fixes: b470e10eb43f ("spi: core: add dma_map_dev for dma device") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20220406132238.1029249-1-vkoul@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c4dd1200fe99..32443ae175fd 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1130,11 +1130,15 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) if (ctlr->dma_tx) tx_dev = ctlr->dma_tx->device->dev; + else if (ctlr->dma_map_dev) + tx_dev = ctlr->dma_map_dev; else tx_dev = ctlr->dev.parent; if (ctlr->dma_rx) rx_dev = ctlr->dma_rx->device->dev; + else if (ctlr->dma_map_dev) + rx_dev = ctlr->dma_map_dev; else rx_dev = ctlr->dev.parent; From 97e4827d775faa9a32b5e1a97959c69dd77d17a3 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 31 Mar 2022 13:08:19 +0200 Subject: [PATCH 317/708] spi: cadence-quadspi: fix protocol setup for non-1-1-X operations cqspi_set_protocol() only set the data width, but ignored the command and address width (except for 8-8-8 DTR ops), leading to corruption of all transfers using 1-X-X or X-X-X ops. Fix by setting the other two widths as well. While we're at it, simplify the code a bit by replacing the CQSPI_INST_TYPE_* constants with ilog2(). Tested on a TI AM64x with a Macronix MX25U51245G QSPI flash with 1-4-4 read and write operations. Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20220331110819.133392-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 46 ++++++++----------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index b0c9f62ccefb..616ada891974 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -102,12 +103,6 @@ struct cqspi_driver_platdata { #define CQSPI_TIMEOUT_MS 500 #define CQSPI_READ_TIMEOUT_MS 10 -/* Instruction type */ -#define CQSPI_INST_TYPE_SINGLE 0 -#define CQSPI_INST_TYPE_DUAL 1 -#define CQSPI_INST_TYPE_QUAD 2 -#define CQSPI_INST_TYPE_OCTAL 3 - #define CQSPI_DUMMY_CLKS_PER_BYTE 8 #define CQSPI_DUMMY_BYTES_MAX 4 #define CQSPI_DUMMY_CLKS_MAX 31 @@ -376,10 +371,6 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, const struct spi_mem_op *op) { - f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - /* * For an op to be DTR, cmd phase along with every other non-empty * phase should have dtr field set to 1. If an op phase has zero @@ -389,32 +380,23 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, (!op->addr.nbytes || op->addr.dtr) && (!op->data.nbytes || op->data.dtr); - switch (op->data.buswidth) { - case 0: - break; - case 1: - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - break; - case 2: - f_pdata->data_width = CQSPI_INST_TYPE_DUAL; - break; - case 4: - f_pdata->data_width = CQSPI_INST_TYPE_QUAD; - break; - case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; - break; - default: - return -EINVAL; - } + f_pdata->inst_width = 0; + if (op->cmd.buswidth) + f_pdata->inst_width = ilog2(op->cmd.buswidth); + + f_pdata->addr_width = 0; + if (op->addr.buswidth) + f_pdata->addr_width = ilog2(op->addr.buswidth); + + f_pdata->data_width = 0; + if (op->data.buswidth) + f_pdata->data_width = ilog2(op->data.buswidth); /* Right now we only support 8-8-8 DTR mode. */ if (f_pdata->dtr) { switch (op->cmd.buswidth) { case 0: - break; case 8: - f_pdata->inst_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -422,9 +404,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->addr.buswidth) { case 0: - break; case 8: - f_pdata->addr_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -432,9 +412,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->data.buswidth) { case 0: - break; case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; From 2c7d1b281286c46049cd22b43435cecba560edde Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 28 Mar 2022 10:24:42 -0400 Subject: [PATCH 318/708] spi: bcm-qspi: fix MSPI only access with bcm_qspi_exec_mem_op() This fixes case where MSPI controller is used to access spi-nor flash and BSPI block is not present. Fixes: 5f195ee7d830 ("spi: bcm-qspi: Implement the spi_mem interface") Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220328142442.7553-1-kdasu.kdev@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 86c76211b3d3..cad2d55dcd3d 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1205,7 +1205,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, addr = op->addr.val; len = op->data.nbytes; - if (bcm_qspi_bspi_ver_three(qspi) == true) { + if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) { /* * The address coming into this function is a raw flash offset. * But for BSPI <= V3, we need to convert it to a remapped BSPI @@ -1224,7 +1224,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, len < 4) mspi_read = true; - if (mspi_read) + if (!has_bspi(qspi) || mspi_read) return bcm_qspi_mspi_exec_mem_op(spi, op); ret = bcm_qspi_bspi_set_mode(qspi, op, 0); From 945da79e6dd058be70bc47442dce319844e14daa Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Thu, 24 Mar 2022 12:19:06 -0400 Subject: [PATCH 319/708] drm/amdgpu/vcn3: send smu interface type For VCN FW to detect ASIC type, in order to use different mailbox registers. V2: simplify codes and fix format issue. Signed-off-by: Boyuan Zhang Acked-by Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 7 +++++++ drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e2fde88aaf5e..f06fb7f882e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -159,6 +159,7 @@ #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) +#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -279,6 +280,11 @@ struct amdgpu_fw_shared_fw_logging { uint32_t size; }; +struct amdgpu_fw_shared_smu_interface_info { + uint8_t smu_interface_type; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -287,6 +293,7 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 0d590183328f..e1cca0a10653 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -219,6 +219,11 @@ static int vcn_v3_0_sw_init(void *handle) cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); + fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2)) + fw_shared->smu_interface_info.smu_interface_type = 2; + else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1)) + fw_shared->smu_interface_info.smu_interface_type = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); From dda81d9761d07541c404dd5fa93e773a8eda5ddc Mon Sep 17 00:00:00 2001 From: tiancyin Date: Sun, 27 Mar 2022 19:07:13 +0800 Subject: [PATCH 320/708] drm/amd/vcn: fix an error msg on vcn 3.0 Some video card has more than one vcn instance, passing 0 to vcn_v3_0_pause_dpg_mode is incorrect. Error msg: Register(1) [mmUVD_POWER_STATUS] failed to reach value 0x00000001 != 0x00000002 Reviewed-by: James Zhu Signed-off-by: tiancyin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e1cca0a10653..cb5f0a12333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1488,7 +1488,7 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v3_0_pause_dpg_mode(adev, 0, &state); + vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, From 862a876c3a6372f2fa9d0c6510f1976ac94fc857 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 15 Mar 2022 12:21:43 -0400 Subject: [PATCH 321/708] drm/amd/display: Correct Slice reset calculation [Why] Once DSC slice cannot fit pixel clock, we incorrectly reset min slices to 0 and allow max slice to operate, even when max slice itself cannot fit the pixel clock properly. [How] Change the sequence such that we correctly determine DSC is not possible when both min slices and max slices cannot fit pixel clock per slice. Reviewed-by: Wenjing Liu Acked-by: Alex Hung Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index efc2339f1fa0..4385d19bc489 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -864,11 +864,11 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } + is_dsc_possible = (min_slices_h <= max_slices_h); + if (pic_width % min_slices_h != 0) min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - is_dsc_possible = (min_slices_h <= max_slices_h); - if (min_slices_h == 0 && max_slices_h == 0) is_dsc_possible = false; From 95707203407c4cf0b7e520a99d6f46d8aed4b57f Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 15 Mar 2022 14:57:34 -0400 Subject: [PATCH 322/708] drm/amd/display: Remove redundant dsc power gating from init_hw [Why] DSC Power down code has been moved from dcn31_init_hw into init_pipes() Need to remove it from dcn10_init_hw() as well to avoid duplicated action on dcn1.x/2.x [How] Remove DSC power down code from dcn10_init_hw() Fixes: 8fa6f4c5715c ("drm/amd/display: fixed the DSC power off sequence during Driver PnP") Reviewed-by: Anthony Koo Reviewed-by: Eric Yang Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index ad757b59e00e..1dec40db582d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1497,13 +1497,6 @@ void dcn10_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - if (!is_optimized_init_done) { - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - } - /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); From 58e16c752e9540b28a873c44c3bee83e022007c1 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 15 Mar 2022 16:31:14 -0400 Subject: [PATCH 323/708] drm/amd/display: Enable power gating before init_pipes [Why] In init_hw() we call init_pipes() before enabling power gating. init_pipes() tries to power gate dsc but it may fail because required force-ons are not released yet. As a result with dsc config the following errors observed on resume: "REG_WAIT timeout 1us * 1000 tries - dcn20_dsc_pg_control" "REG_WAIT timeout 1us * 1000 tries - dcn20_dpp_pg_control" "REG_WAIT timeout 1us * 1000 tries - dcn20_hubp_pg_control" [How] Move enable_power_gating_plane() before init_pipes() in init_hw() Reviewed-by: Anthony Koo Reviewed-by: Eric Yang Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 1dec40db582d..781334b395ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1500,6 +1500,9 @@ void dcn10_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -1552,8 +1555,6 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index ed0a0e5fd805..f61ec8763844 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -547,6 +547,9 @@ void dcn30_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -624,8 +627,6 @@ void dcn30_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 4be228680909..ffc58e24eaf8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -199,6 +199,9 @@ void dcn31_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -248,8 +251,6 @@ void dcn31_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); From 83e9faac9a387894e945e7f33c2bb7a9c348257c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 15 Mar 2022 16:42:33 -0400 Subject: [PATCH 324/708] drm/amd/display: Clear optc false state when disable otg [why] when disable optc, need to clear the underflow status as well. Reviewed-by: Chris Park Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 14 +++++++------- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c | 5 ++++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index ffc58e24eaf8..631d8ac63aa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -339,20 +339,20 @@ void dcn31_enable_power_gating_plane( bool enable) { bool force_on = true; /* disable power gating */ + uint32_t org_ip_request_cntl = 0; if (enable && !hws->ctx->dc->debug.disable_hubp_power_gate) force_on = false; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); /* DCHUBP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); /* DPP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); force_on = true; /* disable power gating */ if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) @@ -360,11 +360,11 @@ void dcn31_enable_power_gating_plane( /* DCS0/1/2/3/4/5 */ REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index 8afe2130d7c5..e05527a3a8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -124,7 +124,6 @@ static bool optc31_enable_crtc(struct timing_generator *optc) static bool optc31_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line * in the vertical blank region */ @@ -138,6 +137,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + optc1_clear_optc_underflow(optc); return true; } @@ -158,6 +158,9 @@ static bool optc31_immediate_disable_crtc(struct timing_generator *optc) OTG_BUSY, 0, 1, 100000); + /* clear the false state */ + optc1_clear_optc_underflow(optc); + return true; } From 2944dbedc7e167221fdb99531f7b0cdbac9ac696 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Wed, 9 Mar 2022 18:26:47 -0500 Subject: [PATCH 325/708] drm/amd/display: Add work around for AUX failure on wake. [Why] When waking from low-power states, a DP sink may remain unresponsive to AUX transactions. [How] Try to toggle DPCD SET_POWER register repeatedly (up to a maximum timeout value) until DP sink becomes responsive. Reviewed-by: Mustapha Ghaddar Acked-by: Alex Hung Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 59 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/dc_link_dp.h | 1 + 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 351081f574cb..e4df81dc1dc2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5216,6 +5216,62 @@ static void retrieve_cable_id(struct dc_link *link) &link->dpcd_caps.cable_id, &usbc_cable_id); } +/* DPRX may take some time to respond to AUX messages after HPD asserted. + * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600). + */ +static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms) +{ + enum dc_status status = DC_ERROR_UNEXPECTED; + uint8_t dpcd_data = 0; + uint64_t start_ts = 0; + uint64_t current_ts = 0; + uint64_t time_taken_ms = 0; + enum dc_connection_type type = dc_connection_none; + + status = core_link_read_dpcd( + link, + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, + &dpcd_data, + sizeof(dpcd_data)); + + if (status != DC_OK) { + DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.", + __func__, + timeout_ms); + start_ts = dm_get_timestamp(link->ctx); + + do { + if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) + break; + + dpcd_data = DP_SET_POWER_D3; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + dpcd_data = DP_SET_POWER_D0; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + current_ts = dm_get_timestamp(link->ctx); + time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000); + } while (status != DC_OK && time_taken_ms < timeout_ms); + + DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s", + __func__, + (status == DC_OK) ? "succeeded" : "failed", + time_taken_ms, + (type == dc_connection_none) ? ". Unplugged." : "."); + } + + return status; +} + static bool retrieve_link_cap(struct dc_link *link) { /* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16, @@ -5251,6 +5307,9 @@ static bool retrieve_link_cap(struct dc_link *link) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); + /* Try to ensure AUX channel active before proceeding. */ + status = wa_try_to_wake_dprx(link, LINK_AUX_WAKE_TIMEOUT_MS); + is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index ab9939db8cea..44f167d2584f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -33,6 +33,7 @@ #define MAX_MTP_SLOT_COUNT 64 #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #define TRAINING_AUX_RD_INTERVAL 100 //us +#define LINK_AUX_WAKE_TIMEOUT_MS 1500 // Timeout when trying to wake unresponsive DPRX. struct dc_link; struct dc_stream_state; From b2075fce104b88b789c15ef1ed2b91dc94198e26 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Fri, 18 Mar 2022 11:12:36 -0400 Subject: [PATCH 326/708] drm/amd/display: Revert FEC check in validation why and how: causes failure on install on certain machines Reviewed-by: George Shen Acked-by: Alex Hung Signed-off-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f2ad8f58e69c..c436db416708 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1496,10 +1496,6 @@ bool dc_validate_boot_timing(const struct dc *dc, if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; - /* Check for FEC status*/ - if (link->link_enc->funcs->fec_is_active(link->link_enc)) - return false; - enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); if (enc_inst == ENGINE_ID_UNKNOWN) From f4346fb3edf7720db3f7f5e1cab1f667cd024280 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 17 Mar 2022 19:55:05 -0400 Subject: [PATCH 327/708] drm/amd/display: Fix allocate_mst_payload assert on resume [Why] On resume we do link detection for all non-MST connectors. MST is handled separately. However the condition for telling if connector is on mst branch is not enough for mst hub case. Link detection for mst branch link leads to mst topology reset. That causes assert in dc_link_allocate_mst_payload() [How] Use link type as indicator for mst link. Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..5895afefccb7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2714,7 +2714,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); From 4052287a75eb3fc0f487fcc5f768a38bede455c8 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Fri, 11 Mar 2022 20:30:17 +0530 Subject: [PATCH 328/708] amd/display: set backlight only if required [Why] comparing pwm bl values (coverted) with user brightness(converted) levels in commit_tail leads to continuous setting of backlight via dmub as they don't to match. This leads overdrive in queuing of commands to DMCU that sometimes lead to depending on load on DMCU fw: "[drm:dc_dmub_srv_wait_idle] *ERROR* Error waiting for DMUB idle: status=3" [How] Store last successfully set backlight value and compare with it instead of pwm reads which is not what we should compare with. Signed-off-by: Shirish S Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5895afefccb7..62139ff35476 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3973,7 +3973,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap max - min); } -static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, int bl_idx, u32 user_brightness) { @@ -4004,7 +4004,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } - return rc ? 0 : 1; + if (rc) + dm->actual_brightness[bl_idx] = user_brightness; } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) @@ -9948,7 +9949,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* restore the backlight level */ for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i] && - (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + (dm->actual_brightness[i] != dm->brightness[i])) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 6a908d736d6a..7e44b0429448 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,12 @@ struct amdgpu_display_manager { * cached backlight values. */ u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + /** + * @actual_brightness: + * + * last successfully applied backlight values. + */ + u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; }; enum dsc_clock_force_state { From 96f2b7a3571618a1c8aed694c9e668014c70898b Mon Sep 17 00:00:00 2001 From: Ruili Ji Date: Thu, 31 Mar 2022 12:19:00 +0800 Subject: [PATCH 329/708] drm/amdgpu: fix incorrect GCR_GENERAL_CNTL address gfx10.3.3/gfx10.3.6/gfx10.3.7 shall use 0x1580 address for GCR_GENERAL_CNTL Acked-by: Prike Liang Acked-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Ruili Ji Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a98b78e0b507..9426e252d8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3293,7 +3293,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3429,7 +3429,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3454,7 +3454,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), From ef1a0808a2e20c58d166c5707864fba515832bd7 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 23 Mar 2022 21:16:19 +0800 Subject: [PATCH 330/708] drm/amdgpu: Sync up header and implementation to use the same parameter names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync up header and implementation to use the same parameter names in function amdgpu_ring_init. ring_size -> max_dw, prio -> hw_prio Reviewed-by: Guchun Chen Reviewed-by: Christian König Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5320bb0883d8..317d80209e95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -300,8 +300,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int ring_size, struct amdgpu_irq_src *irq_src, - unsigned int irq_type, unsigned int prio, + unsigned int max_dw, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int hw_prio, atomic_t *sched_score); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, From e79a2398e1b2d47060474dca291542368183bc0f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 31 Mar 2022 13:21:17 +0100 Subject: [PATCH 331/708] drm/amdkfd: Create file descriptor after client is added to smi_clients list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures userspace cannot prematurely clean-up the client before it is fully initialised which has been proven to cause issues in the past. Cc: Felix Kuehling Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: David Airlie Cc: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index e4beebb1c80a..f2e1d506ba21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -247,15 +247,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) return ret; } - ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, - O_RDWR); - if (ret < 0) { - kfifo_free(&client->fifo); - kfree(client); - return ret; - } - *fd = ret; - init_waitqueue_head(&client->wait_queue); spin_lock_init(&client->lock); client->events = 0; @@ -265,5 +256,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) list_add_rcu(&client->list, &dev->smi_clients); spin_unlock(&dev->smi_lock); + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + return 0; } From ca1198849ab0e7af5efb392ef6baf1138f6fc086 Mon Sep 17 00:00:00 2001 From: CHANDAN VURDIGERE NATARAJ Date: Tue, 29 Mar 2022 13:10:31 +0530 Subject: [PATCH 332/708] drm/amd/display: Fix by adding FPU protection for dcn30_internal_validate_bw [Why] Below general protection fault observed when WebGL Aquarium is run for longer duration. If drm debug logs are enabled and set to 0x1f then the issue is observed within 10 minutes of run. [ 100.717056] general protection fault, probably for non-canonical address 0x2d33302d32323032: 0000 [#1] PREEMPT SMP NOPTI [ 100.727921] CPU: 3 PID: 1906 Comm: DrmThread Tainted: G W 5.15.30 #12 d726c6a2d6ebe5cf9223931cbca6892f916fe18b [ 100.754419] RIP: 0010:CalculateSwathWidth+0x1f7/0x44f [ 100.767109] Code: 00 00 00 f2 42 0f 11 04 f0 48 8b 85 88 00 00 00 f2 42 0f 10 04 f0 48 8b 85 98 00 00 00 f2 42 0f 11 04 f0 48 8b 45 10 0f 57 c0 42 0f 2a 04 b0 0f 57 c9 f3 43 0f 2a 0c b4 e8 8c e2 f3 ff 48 8b [ 100.781269] RSP: 0018:ffffa9230079eeb0 EFLAGS: 00010246 [ 100.812528] RAX: 2d33302d32323032 RBX: 0000000000000500 RCX: 0000000000000000 [ 100.819656] RDX: 0000000000000001 RSI: ffff99deb712c49c RDI: 0000000000000000 [ 100.826781] RBP: ffffa9230079ef50 R08: ffff99deb712460c R09: ffff99deb712462c [ 100.833907] R10: ffff99deb7124940 R11: ffff99deb7124d70 R12: ffff99deb712ae44 [ 100.841033] R13: 0000000000000001 R14: 0000000000000000 R15: ffffa9230079f0a0 [ 100.848159] FS: 00007af121212640(0000) GS:ffff99deba780000(0000) knlGS:0000000000000000 [ 100.856240] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 100.861980] CR2: 0000209000fe1000 CR3: 000000011b18c000 CR4: 0000000000350ee0 [ 100.869106] Call Trace: [ 100.871555] [ 100.873655] ? asm_sysvec_reschedule_ipi+0x12/0x20 [ 100.878449] CalculateSwathAndDETConfiguration+0x1a3/0x6dd [ 100.883937] dml31_ModeSupportAndSystemConfigurationFull+0x2ce4/0x76da [ 100.890467] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.895173] ? kallsyms_lookup_buildid+0xc8/0x163 [ 100.899874] ? __sprint_symbol+0x80/0x135 [ 100.903883] ? dm_update_plane_state+0x3f9/0x4d2 [ 100.908500] ? symbol_string+0xb7/0xde [ 100.912250] ? number+0x145/0x29b [ 100.915566] ? vsnprintf+0x341/0x5ff [ 100.919141] ? desc_read_finalized_seq+0x39/0x87 [ 100.923755] ? update_load_avg+0x1b9/0x607 [ 100.927849] ? compute_mst_dsc_configs_for_state+0x7d/0xd5b [ 100.933416] ? fetch_pipe_params+0xa4d/0xd0c [ 100.937686] ? dc_fpu_end+0x3d/0xa8 [ 100.941175] dml_get_voltage_level+0x16b/0x180 [ 100.945619] dcn30_internal_validate_bw+0x10e/0x89b [ 100.950495] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.955285] ? resource_build_scaling_params+0x98b/0xb8c [ 100.960595] ? dcn31_validate_bandwidth+0x68/0x1fc [ 100.965384] dcn31_validate_bandwidth+0x9a/0x1fc [ 100.970001] dc_validate_global_state+0x238/0x295 [ 100.974703] amdgpu_dm_atomic_check+0x9c1/0xbce [ 100.979235] ? _printk+0x59/0x73 [ 100.982467] drm_atomic_check_only+0x403/0x78b [ 100.986912] drm_mode_atomic_ioctl+0x49b/0x546 [ 100.991358] ? drm_ioctl+0x1c1/0x3b3 [ 100.994936] ? drm_atomic_set_property+0x92a/0x92a [ 100.999725] drm_ioctl_kernel+0xdc/0x149 [ 101.003648] drm_ioctl+0x27f/0x3b3 [ 101.007051] ? drm_atomic_set_property+0x92a/0x92a [ 101.011842] amdgpu_drm_ioctl+0x49/0x7d [ 101.015679] __se_sys_ioctl+0x7c/0xb8 [ 101.015685] do_syscall_64+0x5f/0xb8 [ 101.015690] ? __irq_exit_rcu+0x34/0x96 [How] It calles populate_dml_pipes which uses doubles to initialize. Adding FPU protection avoids context switch and probable loss of vba context as there is potential contention while drm debug logs are enabled. Signed-off-by: CHANDAN VURDIGERE NATARAJ Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 338235bcef4a..63934ecf6be8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2032,7 +2032,9 @@ bool dcn31_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg if (pipe_cnt == 0) From ebc002e3ee78409c42156e62e4e27ad1d09c5a75 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 25 Mar 2022 11:53:39 -0400 Subject: [PATCH 333/708] drm/amdgpu: don't use BACO for reset in S3 Seems to cause a reboots or hangs on some systems. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1924 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1953 Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index c73fb73e9628..5504d81c77b7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -173,6 +173,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; + /* Don't use baco for reset in S3. + * This is a workaround for some platforms + * where entering BACO during suspend + * seems to cause reboots or hangs. + * This might be related to the fact that BACO controls + * power to the whole GPU including devices like audio and USB. + * Powering down/up everything may adversely affect these other + * devices. Needs more investigation. + */ + if (adev->in_s3) + return false; mutex_lock(&adev->pm.mutex); From 7e8906dc2689cbf562ce520cf4a8ba5b495db0f6 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Mon, 21 Mar 2022 10:42:34 -0400 Subject: [PATCH 334/708] drm/amd/display: remove assert for odm transition case Remove assert that will hit during odm transition case, since this is a valid case. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Eric Bernstein Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index d473708d5399..7802d603f796 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1976,7 +1976,6 @@ int dcn20_validate_apply_pipe_split_flags( /*If need split for odm but 4 way split already*/ if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) || !pipe->next_odm_pipe)) { - ASSERT(0); /* NOT expected yet */ merge[i] = true; /* 4 -> 2 ODM */ } else if (split[i] == 0 && pipe->prev_odm_pipe) { ASSERT(0); /* NOT expected yet */ From 6e93d5b0333279d8968a2972065f47a899fb58b9 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Tue, 22 Mar 2022 19:12:47 -0400 Subject: [PATCH 335/708] drm/amd/display: Add configuration options for AUX wake work around. [Why] Work around to try to wake unresponsive DP sinks may need to be adjusted for certain sinks. [How] Add options to disable work around or adjust time spent trying to wake unresponsive DPRX. Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Tom Chung Signed-off-by: Jimmy Kizito Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- drivers/gpu/drm/amd/display/dc/dc.h | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index e4df81dc1dc2..22dabe596dfc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5308,7 +5308,13 @@ static bool retrieve_link_cap(struct dc_link *link) LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); /* Try to ensure AUX channel active before proceeding. */ - status = wa_try_to_wake_dprx(link, LINK_AUX_WAKE_TIMEOUT_MS); + if (link->dc->debug.aux_wake_wa.bits.enable_wa) { + uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms; + + if (link->dc->debug.aux_wake_wa.bits.use_default_timeout) + timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS; + status = wa_try_to_wake_dprx(link, timeout_ms); + } is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9f4d926d54e7..77ef9d1f9ea8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -526,6 +526,22 @@ union dpia_debug_options { uint32_t raw; }; +/* AUX wake work around options + * 0: enable/disable work around + * 1: use default timeout LINK_AUX_WAKE_TIMEOUT_MS + * 15-2: reserved + * 31-16: timeout in ms + */ +union aux_wake_wa_options { + struct { + uint32_t enable_wa : 1; + uint32_t use_default_timeout : 1; + uint32_t rsvd: 14; + uint32_t timeout_ms : 16; + } bits; + uint32_t raw; +}; + struct dc_debug_data { uint32_t ltFailCount; uint32_t i2cErrorCount; @@ -712,6 +728,7 @@ struct dc_debug_options { #endif bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; + union aux_wake_wa_options aux_wake_wa; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; From 879791ad8bf3dc5453061cad74776a617b6e3319 Mon Sep 17 00:00:00 2001 From: Benjamin Marty Date: Wed, 23 Mar 2022 22:08:26 +0100 Subject: [PATCH 336/708] drm/amdgpu/display: change pipe policy for DCN 2.1 Fixes crash on MST Hub disconnect. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1849 Fixes: ee2698cf79cc ("drm/amd/display: Changed pipe split policy to allow for multi-display pipe split") Signed-off-by: Benjamin Marty Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 612732656772..3fe4bfbb98a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -644,7 +644,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From 28c25238898a242c58bfaff3f46a006585c2dd94 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 30 Mar 2022 16:05:50 -0400 Subject: [PATCH 337/708] drm/amd/display: update dcn315 clock table read [Why & How] Make dcn315 base its clock table off dcfclk rather than fclk. This change also adds some sanity checking to make sure an empty pmfw table does not result in invalid dal clocks. Reviewed-by: Charlene Liu Acked-by: Qingqing Zhuo Signed-off-by: Dmytro Laktyushkin Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 107 +++++++++++------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index edda572dc570..8be4c1970628 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -436,57 +436,84 @@ static void dcn315_clk_mgr_helper_populate_bw_params( struct integrated_info *bios_info, const DpmClocks_315_t *clock_table) { - int i, j; + int i; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_dispclk = 0, max_dppclk = 0; + uint32_t max_dispclk, max_dppclk, max_pstate, max_socclk, max_fclk = 0, min_pstate = 0; + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - j = -1; + max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); + max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); + max_socclk = find_max_clk_value(clock_table->SocClocks, clock_table->NumSocClkLevelsEnabled); - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ - - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { - if (clock_table->DfPstateTable[i].FClk != 0) { - j = i; - break; + /* Find highest fclk pstate */ + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { + if (clock_table->DfPstateTable[i].FClk > max_fclk) { + max_fclk = clock_table->DfPstateTable[i].FClk; + max_pstate = i; } } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } + /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */ + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; - bw_params->clk_table.num_entries = j + 1; + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { + if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i] + && clock_table->DfPstateTable[j].FClk < min_fclk) { + min_fclk = clock_table->DfPstateTable[j].FClk; + min_pstate = j; + } + } - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ - if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && - clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { - max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); - max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); - } else { - ASSERT(0); - } - - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - int temp; - - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; - bw_params->clk_table.entries[i].wck_ratio = 1; - temp = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].dcfclk_mhz = temp; - temp = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].socclk_mhz = temp; + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - } + bw_params->clk_table.entries[i].wck_ratio = 1; + }; + /* Make sure to include at least one entry and highest pstate */ + if (max_pstate != min_pstate) { + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage( + clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage( + clock_table, clock_table->SocClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = 1; + i++; + } + bw_params->clk_table.num_entries = i; + + /* Include highest socclk */ + if (bw_params->clk_table.entries[i-1].socclk_mhz < max_socclk) + bw_params->clk_table.entries[i-1].socclk_mhz = max_socclk; + + /* Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + if (!bw_params->clk_table.entries[i].fclk_mhz) { + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; + bw_params->clk_table.entries[i].voltage = def_max.voltage; + } + if (!bw_params->clk_table.entries[i].dcfclk_mhz) + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz) + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; + if (!bw_params->clk_table.entries[i].dispclk_mhz) + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; + if (!bw_params->clk_table.entries[i].dppclk_mhz) + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; + } bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; From 2f25d8ce09b7ba5d769c132ba3d4eb84a941d2cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 Apr 2022 11:08:48 -0400 Subject: [PATCH 338/708] drm/amdgpu/smu10: fix SoC/fclk units in auto mode SMU takes clock limits in Mhz units. socclk and fclk were using 10 khz units in some cases. Switch to Mhz units. Fixes higher than required SoC clocks. Fixes: 97cf32996c46d9 ("drm/amd/pm: Removed fixed clock in auto mode DPM") Reviewed-by: Paul Menzel Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9ddd8491ff00..ede71de2343d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : + (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, From f75e582b0c3ee8f0bddc2248cc8b9175f29c5937 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Thu, 24 Mar 2022 17:15:08 +0800 Subject: [PATCH 339/708] drm/msm/disp: check the return value of kzalloc() kzalloc() is a memory allocation function which can return NULL when some internal memory errors happen. So it is better to check it to prevent potential wrong memory access. Signed-off-by: Xiaoke Wang Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/tencent_B3E19486FF39415098B572B7397C2936C309@qq.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 5d2ff6791058..acfe1b31e079 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -176,6 +176,8 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len, va_list va; new_blk = kzalloc(sizeof(struct msm_disp_state_block), GFP_KERNEL); + if (!new_blk) + return; va_start(va, fmt); From 0fe35b8dcb8b3c4b751a1a44f1e128b690af71e4 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sat, 26 Feb 2022 20:46:32 +0100 Subject: [PATCH 340/708] drm/msm/dpu: Use indexed array initializer to prevent mismatches While there's a comment pointing from dpu_intr_set to dpu_hw_intr_reg and vice-versa, an array initializer using indices makes it so that the indices between the enum and array cannot possibly get out of sync even if they're accidentially ordered wrongly. It is still useful to keep the comment to be made aware where the register offset mapping resides while looking at dpu_hw_intr_reg. Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220226194633.204501-1-marijn.suijten@somainline.org Signed-off-by: Rob Clark --- .../gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index c515b7cf922c..c61b5b283f08 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -54,87 +54,87 @@ struct dpu_intr_reg { * When making changes be sure to sync with dpu_hw_intr_reg */ static const struct dpu_intr_reg dpu_intr_set[] = { - { + [MDP_SSPP_TOP0_INTR] = { MDP_SSPP_TOP0_OFF+INTR_CLEAR, MDP_SSPP_TOP0_OFF+INTR_EN, MDP_SSPP_TOP0_OFF+INTR_STATUS }, - { + [MDP_SSPP_TOP0_INTR2] = { MDP_SSPP_TOP0_OFF+INTR2_CLEAR, MDP_SSPP_TOP0_OFF+INTR2_EN, MDP_SSPP_TOP0_OFF+INTR2_STATUS }, - { + [MDP_SSPP_TOP0_HIST_INTR] = { MDP_SSPP_TOP0_OFF+HIST_INTR_CLEAR, MDP_SSPP_TOP0_OFF+HIST_INTR_EN, MDP_SSPP_TOP0_OFF+HIST_INTR_STATUS }, - { + [MDP_INTF0_INTR] = { MDP_INTF_0_OFF+INTF_INTR_CLEAR, MDP_INTF_0_OFF+INTF_INTR_EN, MDP_INTF_0_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF1_INTR] = { MDP_INTF_1_OFF+INTF_INTR_CLEAR, MDP_INTF_1_OFF+INTF_INTR_EN, MDP_INTF_1_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF2_INTR] = { MDP_INTF_2_OFF+INTF_INTR_CLEAR, MDP_INTF_2_OFF+INTF_INTR_EN, MDP_INTF_2_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF3_INTR] = { MDP_INTF_3_OFF+INTF_INTR_CLEAR, MDP_INTF_3_OFF+INTF_INTR_EN, MDP_INTF_3_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF4_INTR] = { MDP_INTF_4_OFF+INTF_INTR_CLEAR, MDP_INTF_4_OFF+INTF_INTR_EN, MDP_INTF_4_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF5_INTR] = { MDP_INTF_5_OFF+INTF_INTR_CLEAR, MDP_INTF_5_OFF+INTF_INTR_EN, MDP_INTF_5_OFF+INTF_INTR_STATUS }, - { + [MDP_AD4_0_INTR] = { MDP_AD4_0_OFF + MDP_AD4_INTR_CLEAR_OFF, MDP_AD4_0_OFF + MDP_AD4_INTR_EN_OFF, MDP_AD4_0_OFF + MDP_AD4_INTR_STATUS_OFF, }, - { + [MDP_AD4_1_INTR] = { MDP_AD4_1_OFF + MDP_AD4_INTR_CLEAR_OFF, MDP_AD4_1_OFF + MDP_AD4_INTR_EN_OFF, MDP_AD4_1_OFF + MDP_AD4_INTR_STATUS_OFF, }, - { + [MDP_INTF0_7xxx_INTR] = { MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF1_7xxx_INTR] = { MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF2_7xxx_INTR] = { MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF3_7xxx_INTR] = { MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF4_7xxx_INTR] = { MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF5_7xxx_INTR] = { MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_STATUS From 2e8702cc0cfa1080f29fd64003c00a3e24ac38de Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 6 Apr 2022 15:41:12 +0300 Subject: [PATCH 341/708] bpf: Support dual-stack sockets in bpf_tcp_check_syncookie bpf_tcp_gen_syncookie looks at the IP version in the IP header and validates the address family of the socket. It supports IPv4 packets in AF_INET6 dual-stack sockets. On the other hand, bpf_tcp_check_syncookie looks only at the address family of the socket, ignoring the real IP version in headers, and validates only the packet size. This implementation has some drawbacks: 1. Packets are not validated properly, allowing a BPF program to trick bpf_tcp_check_syncookie into handling an IPv6 packet on an IPv4 socket. 2. Dual-stack sockets fail the checks on IPv4 packets. IPv4 clients end up receiving a SYNACK with the cookie, but the following ACK gets dropped. This patch fixes these issues by changing the checks in bpf_tcp_check_syncookie to match the ones in bpf_tcp_gen_syncookie. IP version from the header is taken into account, and it is validated properly with address family. Fixes: 399040847084 ("bpf: add helper to check for a valid SYN cookie") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Alexei Starovoitov Reviewed-by: Tariq Toukan Acked-by: Arthur Fabre Link: https://lore.kernel.org/bpf/20220406124113.2795730-1-maximmi@nvidia.com --- net/core/filter.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index a7044e98765e..64470a727ef7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7016,24 +7016,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (!th->ack || th->rst || th->syn) return -ENOENT; + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; cookie = ntohl(th->ack_seq) - 1; - switch (sk->sk_family) { - case AF_INET: - if (unlikely(iph_len < sizeof(struct iphdr))) + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); break; #if IS_BUILTIN(CONFIG_IPV6) - case AF_INET6: + case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; + if (sk->sk_family != AF_INET6) + return -EINVAL; + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ From 53968dafc4a6061c1e01d884f1f9a4b8c4b0d5bc Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 6 Apr 2022 15:41:13 +0300 Subject: [PATCH 342/708] bpf: Adjust bpf_tcp_check_syncookie selftest to test dual-stack sockets The previous commit fixed support for dual-stack sockets in bpf_tcp_check_syncookie. This commit adjusts the selftest to verify the fixed functionality. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Alexei Starovoitov Acked-by: Arthur Fabre Link: https://lore.kernel.org/bpf/20220406124113.2795730-2-maximmi@nvidia.com --- .../bpf/test_tcp_check_syncookie_user.c | 78 ++++++++++++++----- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index b9e991d43155..e7775d3bbe08 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -18,8 +18,9 @@ #include "bpf_rlimit.h" #include "cgroup_helpers.h" -static int start_server(const struct sockaddr *addr, socklen_t len) +static int start_server(const struct sockaddr *addr, socklen_t len, bool dual) { + int mode = !dual; int fd; fd = socket(addr->sa_family, SOCK_STREAM, 0); @@ -28,6 +29,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len) goto out; } + if (addr->sa_family == AF_INET6) { + if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode, + sizeof(mode)) == -1) { + log_err("Failed to set the dual-stack mode"); + goto close_out; + } + } + if (bind(fd, addr, len) == -1) { log_err("Failed to bind server socket"); goto close_out; @@ -47,24 +56,17 @@ out: return fd; } -static int connect_to_server(int server_fd) +static int connect_to_server(const struct sockaddr *addr, socklen_t len) { - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); int fd = -1; - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto out; - } - - fd = socket(addr.ss_family, SOCK_STREAM, 0); + fd = socket(addr->sa_family, SOCK_STREAM, 0); if (fd == -1) { log_err("Failed to create client socket"); goto out; } - if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + if (connect(fd, (const struct sockaddr *)addr, len) == -1) { log_err("Fail to connect to server"); goto close_out; } @@ -116,7 +118,8 @@ err: return map_fd; } -static int run_test(int server_fd, int results_fd, bool xdp) +static int run_test(int server_fd, int results_fd, bool xdp, + const struct sockaddr *addr, socklen_t len) { int client = -1, srv_client = -1; int ret = 0; @@ -142,7 +145,7 @@ static int run_test(int server_fd, int results_fd, bool xdp) goto err; } - client = connect_to_server(server_fd); + client = connect_to_server(addr, len); if (client == -1) goto err; @@ -199,12 +202,30 @@ out: return ret; } +static bool get_port(int server_fd, in_port_t *port) +{ + struct sockaddr_in addr; + socklen_t len = sizeof(addr); + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + return false; + } + + /* sin_port and sin6_port are located at the same offset. */ + *port = addr.sin_port; + return true; +} + int main(int argc, char **argv) { struct sockaddr_in addr4; struct sockaddr_in6 addr6; + struct sockaddr_in addr4dual; + struct sockaddr_in6 addr6dual; int server = -1; int server_v6 = -1; + int server_dual = -1; int results = -1; int err = 0; bool xdp; @@ -224,25 +245,43 @@ int main(int argc, char **argv) addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); addr4.sin_port = 0; + memcpy(&addr4dual, &addr4, sizeof(addr4dual)); memset(&addr6, 0, sizeof(addr6)); addr6.sin6_family = AF_INET6; addr6.sin6_addr = in6addr_loopback; addr6.sin6_port = 0; - server = start_server((const struct sockaddr *)&addr4, sizeof(addr4)); - if (server == -1) + memset(&addr6dual, 0, sizeof(addr6dual)); + addr6dual.sin6_family = AF_INET6; + addr6dual.sin6_addr = in6addr_any; + addr6dual.sin6_port = 0; + + server = start_server((const struct sockaddr *)&addr4, sizeof(addr4), + false); + if (server == -1 || !get_port(server, &addr4.sin_port)) goto err; server_v6 = start_server((const struct sockaddr *)&addr6, - sizeof(addr6)); - if (server_v6 == -1) + sizeof(addr6), false); + if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port)) goto err; - if (run_test(server, results, xdp)) + server_dual = start_server((const struct sockaddr *)&addr6dual, + sizeof(addr6dual), true); + if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port)) goto err; - if (run_test(server_v6, results, xdp)) + if (run_test(server, results, xdp, + (const struct sockaddr *)&addr4, sizeof(addr4))) + goto err; + + if (run_test(server_v6, results, xdp, + (const struct sockaddr *)&addr6, sizeof(addr6))) + goto err; + + if (run_test(server_dual, results, xdp, + (const struct sockaddr *)&addr4dual, sizeof(addr4dual))) goto err; printf("ok\n"); @@ -252,6 +291,7 @@ err: out: close(server); close(server_v6); + close(server_dual); close(results); return err; } From 5593473a1e6c743764b08e3b6071cb43b5cfa6c4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Apr 2022 13:13:42 -0400 Subject: [PATCH 343/708] KVM: avoid NULL pointer dereference in kvm_dirty_ring_push kvm_vcpu_release() will call kvm_dirty_ring_free(), freeing ring->dirty_gfns and setting it to NULL. Afterwards, it calls kvm_arch_vcpu_destroy(). However, if closing the file descriptor races with KVM_RUN in such away that vcpu->arch.st.preempted == 0, the following call stack leads to a NULL pointer dereference in kvm_dirty_run_push(): mark_page_dirty_in_slot+0x192/0x270 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3171 kvm_steal_time_set_preempted arch/x86/kvm/x86.c:4600 [inline] kvm_arch_vcpu_put+0x34e/0x5b0 arch/x86/kvm/x86.c:4618 vcpu_put+0x1b/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:211 vmx_free_vcpu+0xcb/0x130 arch/x86/kvm/vmx/vmx.c:6985 kvm_arch_vcpu_destroy+0x76/0x290 arch/x86/kvm/x86.c:11219 kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline] The fix is to release the dirty page ring after kvm_arch_vcpu_destroy has run. Reported-by: Qiuhao Li Reported-by: Gaoning Pan Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70e05af5ebea..b22f380e3347 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -434,8 +434,8 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { - kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); + kvm_dirty_ring_free(&vcpu->dirty_ring); /* * No need for rcu_read_lock as VCPU_RUN is the only place that changes From ce8b3ad1071b764e963d9b08ac34ffddddf12da6 Mon Sep 17 00:00:00 2001 From: Dongjin Yang Date: Mon, 4 Apr 2022 11:28:57 +0900 Subject: [PATCH 344/708] dt-bindings: net: snps: remove duplicate name snps,dwmac has duplicated name for loongson,ls2k-dwmac and loongson,ls7a-dwmac. Signed-off-by: Dongjin Yang Fixes: 68277749a013 ("dt-bindings: dwmac: Add bindings for new Loongson SoC and bridge chip") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220404022857epcms1p6e6af1a6a86569f339e50c318abde7d3c@epcms1p6 --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 2d5248f5b919..36c85eb3dc0d 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -53,20 +53,18 @@ properties: - allwinner,sun8i-r40-gmac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac - - loongson,ls2k-dwmac - - loongson,ls7a-dwmac - amlogic,meson6-dwmac - amlogic,meson8b-dwmac - amlogic,meson8m2-dwmac - amlogic,meson-gxbb-dwmac - amlogic,meson-axg-dwmac - - loongson,ls2k-dwmac - - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac - ingenic,x1830-mac - ingenic,x2000-mac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac From 773f91b2cf3f52df0d7508fdbf60f37567cdaee4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 Apr 2022 17:08:21 -0400 Subject: [PATCH 345/708] SUNRPC: Fix NFSD's request deferral on RDMA transports Trond Myklebust reports an NFSD crash in svc_rdma_sendto(). Further investigation shows that the crash occurred while NFSD was handling a deferred request. This patch addresses two inter-related issues that prevent request deferral from working correctly for RPC/RDMA requests: 1. Prevent the crash by ensuring that the original svc_rqst::rq_xprt_ctxt value is available when the request is revisited. Otherwise svc_rdma_sendto() does not have a Receive context available with which to construct its reply. 2. Possibly since before commit 71641d99ce03 ("svcrdma: Properly compute .len and .buflen for received RPC Calls"), svc_rdma_recvfrom() did not include the transport header in the returned xdr_buf. There should have been no need for svc_defer() and friends to save and restore that header, as of that commit. This issue is addressed in a backport-friendly way by simply having svc_rdma_recvfrom() set rq_xprt_hlen to zero unconditionally, just as svc_tcp_recvfrom() does. This enables svc_deferred_recv() to correctly reconstruct an RPC message received via RPC/RDMA. Reported-by: Trond Myklebust Link: https://lore.kernel.org/linux-nfs/82662b7190f26fb304eb0ab1bb04279072439d4e.camel@hammerspace.com/ Signed-off-by: Chuck Lever Cc: --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc_xprt.c | 3 +++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a5dda4987e8b..217711fc9cac 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -395,6 +395,7 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; size_t xprt_hlen; int argslen; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0c117d3bfda8..b42cfffa7395 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1231,6 +1231,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; dr->xprt_hlen = rqstp->rq_xprt_hlen; + dr->xprt_ctxt = rqstp->rq_xprt_ctxt; + rqstp->rq_xprt_ctxt = NULL; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; @@ -1269,6 +1271,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_xprt_ctxt = dr->xprt_ctxt; svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad127b..864131a9fc6e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -831,7 +831,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_err; if (ret == 0) goto out_drop; - rqstp->rq_xprt_hlen = ret; + rqstp->rq_xprt_hlen = 0; if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; From aadb22ba2f656581b2f733deb3a467c48cc618f6 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 6 Apr 2022 21:04:43 +0200 Subject: [PATCH 346/708] drbd: Fix five use after free bugs in get_initial_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In get_initial_state, it calls notify_initial_state_done(skb,..) if cb->args[5]==1. If genlmsg_put() failed in notify_initial_state_done(), the skb will be freed by nlmsg_free(skb). Then get_initial_state will goto out and the freed skb will be used by return value skb->len, which is a uaf bug. What's worse, the same problem goes even further: skb can also be freed in the notify_*_state_change -> notify_*_state calls below. Thus 4 additional uaf bugs happened. My patch lets the problem callee functions: notify_initial_state_done and notify_*_state_change return an error code if errors happen. So that the error codes could be propagated and the uaf bugs can be avoid. v2 reports a compilation warning. This v3 fixed this warning and built successfully in my local environment with no additional warnings. v2: https://lore.kernel.org/patchwork/patch/1435218/ Fixes: a29728463b254 ("drbd: Backport the "events2" command") Signed-off-by: Lv Yunlong Reviewed-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 8 ++--- drivers/block/drbd/drbd_nl.c | 41 ++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 18 +++++------ drivers/block/drbd/drbd_state_change.h | 8 ++--- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4b55e864a0a3..4d3efaa20b7b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1638,22 +1638,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 02030c9c4d3b..b7216c186ba4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -4549,7 +4549,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4591,16 +4591,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4640,16 +4641,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4689,16 +4691,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4739,13 +4742,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4796,7 +4800,7 @@ fail: err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4810,11 +4814,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4841,6 +4846,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4849,32 +4855,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4889,7 +4895,10 @@ next: cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b8a27818ab3f..4ee11aef6672 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6ab..d5b0479bc9a6 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Wed, 6 Apr 2022 21:04:44 +0200 Subject: [PATCH 347/708] drbd: fix an invalid memory access caused by incorrect use of list iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug is here: idr_remove(&connection->peer_devices, vnr); If the previous for_each_connection() don't exit early (no goto hit inside the loop), the iterator 'connection' after the loop will be a bogus pointer to an invalid structure object containing the HEAD (&resource->connections). As a result, the use of 'connection' above will lead to a invalid memory access (including a possible invalid free as idr_remove could call free_layer). The original intention should have been to remove all peer_devices, but the following lines have already done the work. So just remove this line and the unneeded label, to fix this bug. Cc: stable@vger.kernel.org Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices") Signed-off-by: Xiaomeng Tong Reviewed-by: Christoph Böhmwalder Reviewed-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9676a1d214bc..d6dfa286ddb3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; } err = add_disk(disk); if (err) - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); From 286901941fd18a52b2138fddbbf589ad3639eb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Wed, 6 Apr 2022 21:04:45 +0200 Subject: [PATCH 348/708] drbd: set QUEUE_FLAG_STABLE_WRITES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want our pages not to change while they are being written. Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d6dfa286ddb3..4b0b25cc916e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2719,6 +2719,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ From 0f525289ff0ddeb380813bd81e0f9bdaaa1c9078 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Apr 2022 21:44:02 +0200 Subject: [PATCH 349/708] fbdev: Fix unregistering of framebuffers without device OF framebuffers do not have an underlying device in the Linux device hierarchy. Do a regular unregister call instead of hot unplugging such a non-existing device. Fixes a NULL dereference. An example error message on ppc64le is shown below. BUG: Kernel NULL pointer dereference on read at 0x00000060 Faulting instruction address: 0xc00000000080dfa4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries [...] CPU: 2 PID: 139 Comm: systemd-udevd Not tainted 5.17.0-ae085d7f9365 #1 NIP: c00000000080dfa4 LR: c00000000080df9c CTR: c000000000797430 REGS: c000000004132fe0 TRAP: 0300 Not tainted (5.17.0-ae085d7f9365) MSR: 8000000002009033 CR: 28228282 XER: 20000000 CFAR: c00000000000c80c DAR: 0000000000000060 DSISR: 40000000 IRQMASK: 0 GPR00: c00000000080df9c c000000004133280 c00000000169d200 0000000000000029 GPR04: 00000000ffffefff c000000004132f90 c000000004132f88 0000000000000000 GPR08: c0000000015658f8 c0000000015cd200 c0000000014f57d0 0000000048228283 GPR12: 0000000000000000 c00000003fffe300 0000000020000000 0000000000000000 GPR16: 0000000000000000 0000000113fc4a40 0000000000000005 0000000113fcfb80 GPR20: 000001000f7283b0 0000000000000000 c000000000e4a588 c000000000e4a5b0 GPR24: 0000000000000001 00000000000a0000 c008000000db0168 c0000000021f6ec0 GPR28: c0000000016d65a8 c000000004b36460 0000000000000000 c0000000016d64b0 NIP [c00000000080dfa4] do_remove_conflicting_framebuffers+0x184/0x1d0 [c000000004133280] [c00000000080df9c] do_remove_conflicting_framebuffers+0x17c/0x1d0 (unreliable) [c000000004133350] [c00000000080e4d0] remove_conflicting_framebuffers+0x60/0x150 [c0000000041333a0] [c00000000080e6f4] remove_conflicting_pci_framebuffers+0x134/0x1b0 [c000000004133450] [c008000000e70438] drm_aperture_remove_conflicting_pci_framebuffers+0x90/0x100 [drm] [c000000004133490] [c008000000da0ce4] bochs_pci_probe+0x6c/0xa64 [bochs] [...] [c000000004133db0] [c00000000002aaa0] system_call_exception+0x170/0x2d0 [c000000004133e10] [c00000000000c3cc] system_call_common+0xec/0x250 The bug [1] was introduced by commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). Most firmware framebuffers have an underlying platform device, which can be hot-unplugged before loading the native graphics driver. OF framebuffers do not (yet) have that device. Fix the code by unregistering the framebuffer as before without a hot unplug. Tested with 5.17 on qemu ppc64le emulation. Signed-off-by: Thomas Zimmermann Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") Reported-by: Sudip Mukherjee Reviewed-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Tested-by: Sudip Mukherjee Cc: Zack Rusin Cc: Javier Martinez Canillas Cc: Hans de Goede Cc: stable@vger.kernel.org # v5.11+ Cc: Helge Deller Cc: Daniel Vetter Cc: Sam Ravnborg Cc: Zheyu Ma Cc: Xiyu Yang Cc: Zhen Lei Cc: Matthew Wilcox Cc: Alex Deucher Cc: Tetsuo Handa Cc: Guenter Roeck Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/all/YkHXO6LGHAN0p1pq@debian/ # [1] Link: https://patchwork.freedesktop.org/patch/msgid/20220404194402.29974-1-tzimmermann@suse.de --- drivers/video/fbdev/core/fbmem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 34d6bb1bf82e..a6bb0e438216 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1579,7 +1579,14 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a, * If it's not a platform device, at least print a warning. A * fix would add code to remove the device from the system. */ - if (dev_is_platform(device)) { + if (!device) { + /* TODO: Represent each OF framebuffer as its own + * device in the device hierarchy. For now, offb + * doesn't have such a device, so unregister the + * framebuffer as before without warning. + */ + do_unregister_framebuffer(registered_fb[i]); + } else if (dev_is_platform(device)) { registered_fb[i]->forced_out = true; platform_device_unregister(to_platform_device(device)); } else { From 5dc6ce767dc8ba173534fa75c66a9e2a13b969d1 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 8 Mar 2022 21:15:43 +0000 Subject: [PATCH 350/708] dt-bindings: gpu: mali-bifrost: Document RZ/V2L SoC The Renesas RZ/V2L SoC (a.k.a R9A07G054) has a Bifrost Mali-G31 GPU, add a compatible string for it. Signed-off-by: Lad Prabhakar Reviewed-by: Biju Das Reviewed-by: Geert Uytterhoeven Acked-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220308211543.3081-1-prabhakar.mahadev-lad.rj@bp.renesas.com --- Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml index 4d6bfae0653c..85f8d4764740 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml +++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml @@ -20,6 +20,7 @@ properties: - mediatek,mt8183-mali - realtek,rtd1619-mali - renesas,r9a07g044-mali + - renesas,r9a07g054-mali - rockchip,px30-mali - rockchip,rk3568-mali - const: arm,mali-bifrost # Mali Bifrost GPU model/revision is fully discoverable @@ -109,7 +110,9 @@ allOf: properties: compatible: contains: - const: renesas,r9a07g044-mali + enum: + - renesas,r9a07g044-mali + - renesas,r9a07g054-mali then: properties: interrupts: From ffa0b64e3be58519ae472ea29a1a1ad681e32f48 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 Apr 2022 00:57:57 +1000 Subject: [PATCH 351/708] powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit mpe: On 64-bit Book3E vmalloc space starts at 0x8000000000000000. Because of the way __pa() works we have: __pa(0x8000000000000000) == 0, and therefore virt_to_pfn(0x8000000000000000) == 0, and therefore virt_addr_valid(0x8000000000000000) == true Which is wrong, virt_addr_valid() should be false for vmalloc space. In fact all vmalloc addresses that alias with a valid PFN will return true from virt_addr_valid(). That can cause bugs with hardened usercopy as described below by Kefeng Wang: When running ethtool eth0 on 64-bit Book3E, a BUG occurred: usercopy: Kernel memory exposure attempt detected from SLUB object not in SLUB page?! (offset 0, size 1048)! kernel BUG at mm/usercopy.c:99 ... usercopy_abort+0x64/0xa0 (unreliable) __check_heap_object+0x168/0x190 __check_object_size+0x1a0/0x200 dev_ethtool+0x2494/0x2b20 dev_ioctl+0x5d0/0x770 sock_do_ioctl+0xf0/0x1d0 sock_ioctl+0x3ec/0x5a0 __se_sys_ioctl+0xf0/0x160 system_call_exception+0xfc/0x1f0 system_call_common+0xf8/0x200 The code shows below, data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) The data is alloced by vmalloc(), virt_addr_valid(ptr) will return true on 64-bit Book3E, which leads to the panic. As commit 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") does, make sure the virt addr above PAGE_OFFSET in the virt_addr_valid() for 64-bit, also add upper limit check to make sure the virt is below high_memory. Meanwhile, for 32-bit PAGE_OFFSET is the virtual address of the start of lowmem, high_memory is the upper low virtual address, the check is suitable for 32-bit, this will fix the issue mentioned in commit 602946ec2f90 ("powerpc: Set max_mapnr correctly") too. On 32-bit there is a similar problem with high memory, that was fixed in commit 602946ec2f90 ("powerpc: Set max_mapnr correctly"), but that commit breaks highmem and needs to be reverted. We can't easily fix __pa(), we have code that relies on its current behaviour. So for now add extra checks to virt_addr_valid(). For 64-bit Book3S the extra checks are not necessary, the combination of virt_to_pfn() and pfn_valid() should yield the correct result, but they are harmless. Signed-off-by: Kefeng Wang Reviewed-by: Christophe Leroy [mpe: Add additional change log detail] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220406145802.538416-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/page.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 254687258f42..f2c5c26869f1 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't From 1ff5c8e8c835e8a81c0868e3050c76563dd56a2c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 Apr 2022 00:57:58 +1000 Subject: [PATCH 352/708] Revert "powerpc: Set max_mapnr correctly" This reverts commit 602946ec2f90d5bd965857753880db29d2d9a1e9. If CONFIG_HIGHMEM is enabled, no highmem will be added with max_mapnr set to max_low_pfn, see mem_init(): for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { ... free_highmem_page(); } Now that virt_addr_valid() has been fixed in the previous commit, we can revert the change to max_mapnr. Fixes: 602946ec2f90 ("powerpc: Set max_mapnr correctly") Signed-off-by: Kefeng Wang Reviewed-by: Christophe Leroy Reported-by: Erhard F. [mpe: Update change log to reflect series reordering] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220406145802.538416-2-mpe@ellerman.id.au --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8e301cd8925b..4d221d033804 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -255,7 +255,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_low_pfn); + set_max_mapnr(max_pfn); kasan_late_init(); From e3c1c4fd9e6d14059ed93ebfe15e1c57793b1a05 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 6 Apr 2022 02:36:16 +0200 Subject: [PATCH 353/708] random: check for signals every PAGE_SIZE chunk of /dev/[u]random In 1448769c9cdb ("random: check for signal_pending() outside of need_resched() check"), Jann pointed out that we previously were only checking the TIF_NOTIFY_SIGNAL and TIF_SIGPENDING flags if the process had TIF_NEED_RESCHED set, which meant in practice, super long reads to /dev/[u]random would delay signal handling by a long time. I tried this using the below program, and indeed I wasn't able to interrupt a /dev/urandom read until after several megabytes had been read. The bug he fixed has always been there, and so code that reads from /dev/urandom without checking the return value of read() has mostly worked for a long time, for most sizes, not just for <= 256. Maybe it makes sense to keep that code working. The reason it was so small prior, ignoring the fact that it didn't work anyway, was likely because /dev/random used to block, and that could happen for pretty large lengths of time while entropy was gathered. But now, it's just a chacha20 call, which is extremely fast and is just operating on pure data, without having to wait for some external event. In that sense, /dev/[u]random is a lot more like /dev/zero. Taking a page out of /dev/zero's read_zero() function, it always returns at least one chunk, and then checks for signals after each chunk. Chunk sizes there are of length PAGE_SIZE. Let's just copy the same thing for /dev/[u]random, and check for signals and cond_resched() for every PAGE_SIZE amount of data. This makes the behavior more consistent with expectations, and should mitigate the impact of Jann's fix for the age-old signal check bug. ---- test program ---- #include #include #include #include static unsigned char x[~0U]; static void handle(int) { } int main(int argc, char *argv[]) { pid_t pid = getpid(), child; signal(SIGUSR1, handle); if (!(child = fork())) { for (;;) kill(pid, SIGUSR1); } pause(); printf("interrupted after reading %zd bytes\n", getrandom(x, sizeof(x), 0)); kill(child, SIGTERM); return 0; } Cc: Jann Horn Cc: Theodore Ts'o Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 394cbd814a0b..e15063d61460 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -523,7 +523,6 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - bool large_request = nbytes > 256; ssize_t ret = 0; size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; @@ -549,15 +548,6 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) } do { - if (large_request) { - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } - cond_resched(); - } - chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; @@ -571,6 +561,13 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) nbytes -= len; buf += len; ret += len; + + BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); + if (!(ret % PAGE_SIZE) && nbytes) { + if (signal_pending(current)) + break; + cond_resched(); + } } while (nbytes); memzero_explicit(output, sizeof(output)); From 1ecc0c09f19f8e10a2c52676f8ca47c28c9f73c7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 4 Apr 2022 21:21:05 +0200 Subject: [PATCH 354/708] dt-bindings: display: panel: mipi-dbi-spi: Make width-mm/height-mm mandatory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the width-mm/height-mm panel properties mandatory to correctly report the panel dimensions to the OS. Fixes: 2f3468b82db97 ("dt-bindings: display: add bindings for MIPI DBI compatible SPI panels") Signed-off-by: Marek Vasut Cc: Christoph Niedermaier Cc: Daniel Vetter Cc: Dmitry Osipenko Cc: Laurent Pinchart Cc: Noralf Trønnes Cc: Rob Herring Cc: Robert Foss Cc: Sam Ravnborg Cc: Thomas Zimmermann Cc: devicetree@vger.kernel.org To: dri-devel@lists.freedesktop.org Acked-by: Noralf Trønnes Reviewed-by: Laurent Pinchart Acked-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20220404192105.12547-1-marex@denx.de --- .../devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml index f29789994b18..c2df8d28aaf5 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml @@ -83,6 +83,8 @@ properties: required: - compatible - reg + - width-mm + - height-mm - panel-timing unevaluatedProperties: false From 6a0d0ae3e8b533d6de627c814c60264b9a85bad6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2022 13:55:47 +0900 Subject: [PATCH 355/708] scsi: scsi_debug: Fix sdebug_blk_mq_poll() in_use_bm bitmap use The in_use_bm bitmap of struct sdebug_queue should be accessed under protection of the qc_lock spinlock. Make sure that this lock is taken before calling find_first_bit() at the beginning of the function sdebug_blk_mq_poll(). Link: https://lore.kernel.org/r/20220404045547.579887-1-damien.lemoal@opensource.wdc.com Fixes: 3fd07aecb750 ("scsi: scsi_debug: Fix qc_lock use in sdebug_blk_mq_poll()") Acked-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index c607755cce00..ff78ef702f22 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7519,12 +7519,13 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) struct sdebug_defer *sd_dp; sqp = sdebug_q_arr + queue_num; - qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue); - if (qc_idx >= sdebug_max_queue) - return 0; spin_lock_irqsave(&sqp->qc_lock, iflags); + qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue); + if (qc_idx >= sdebug_max_queue) + goto unlock; + for (first = true; first || qc_idx + 1 < sdebug_max_queue; ) { if (first) { first = false; @@ -7589,6 +7590,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) break; } +unlock: spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (num_entries > 0) From 6eaa77144b90582cef7f1fc346f11df51f9f83d5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2022 14:00:41 +0900 Subject: [PATCH 356/708] scsi: mpt3sas: Fix mpt3sas_check_same_4gb_region() kdoc comment The start_addres argument of mpt3sas_check_same_4gb_region() was misnamed in the function kdoc comment, resulting in the following warning when compiling with W=1. drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Function parameter or member 'start_address' not described in 'mpt3sas_check_same_4gb_region' drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Excess function parameter 'reply_pool_start_address' description in 'mpt3sas_check_same_4gb_region' Fix the argument name in the function kdoc comment to avoid it. While at it, remove a useless blank line between the kdoc and function code. Link: https://lore.kernel.org/r/20220404050041.594774-1-damien.lemoal@opensource.wdc.com Acked-by: Sreekanth Reddy Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b57f1803371e..538d2c0cd971 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5716,13 +5716,12 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) /** * mpt3sas_check_same_4gb_region - checks whether all reply queues in a set are * having same upper 32bits in their base memory address. - * @reply_pool_start_address: Base address of a reply queue set + * @start_address: Base address of a reply queue set * @pool_sz: Size of single Reply Descriptor Post Queues pool size * * Return: 1 if reply queues in a set have a same upper 32bits in their base * memory address, else 0. */ - static int mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz) { From 4049f7acef3eb37c1ea0df45f3ffc29404f4e708 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Apr 2022 08:50:38 +0300 Subject: [PATCH 357/708] scsi: ufs: ufs-pci: Add support for Intel MTL Add PCI ID and callbacks to support Intel Meteor Lake (MTL). Link: https://lore.kernel.org/r/20220404055038.2208051-1-adrian.hunter@intel.com Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Avri Altman Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index f76692053ca1..e892b9feffb1 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -428,6 +428,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static int ufs_intel_mtl_init(struct ufs_hba *hba) +{ + hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; + return ufs_intel_common_init(hba); +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -465,6 +471,16 @@ static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = { .device_reset = ufs_intel_device_reset, }; +static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_mtl_init, + .exit = ufs_intel_common_exit, + .hce_enable_notify = ufs_intel_hce_enable_notify, + .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; + #ifdef CONFIG_PM_SLEEP static int ufshcd_pci_restore(struct device *dev) { @@ -579,6 +595,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops }, { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x7E47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { } /* terminate list */ }; From 75f5a0c4744c7880f1ceceb8b22e3751bf1d4166 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 6 Apr 2022 19:05:39 +1000 Subject: [PATCH 358/708] scsi: sym53c500_cs: Stop using struct scsi_pointer This driver doesn't use SCp.ptr to save a SCSI command data pointer which means "scsi pointer" is a complete misnomer here. Only a few members of struct scsi_pointer are needed so move those to private command data. Link: https://lore.kernel.org/r/accf71e293ba3aed6d18c8baeb405de8dfe7c935.1649235939.git.fthain@linux-m68k.org Cc: Bart Van Assche Cc: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/pcmcia/sym53c500_cs.c | 52 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index c4a838635893..5d7dfefd6f6c 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -192,10 +192,11 @@ struct sym53c500_data { int fast_pio; }; -static struct scsi_pointer *sym53c500_scsi_pointer(struct scsi_cmnd *cmd) -{ - return scsi_cmd_priv(cmd); -} +struct sym53c500_cmd_priv { + int status; + int message; + int phase; +}; enum Phase { idle, @@ -356,7 +357,7 @@ SYM53C500_intr(int irq, void *dev_id) struct sym53c500_data *data = (struct sym53c500_data *)dev->hostdata; struct scsi_cmnd *curSC = data->current_SC; - struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(curSC); + struct sym53c500_cmd_priv *scp = scsi_cmd_priv(curSC); int fast_pio = data->fast_pio; spin_lock_irqsave(dev->host_lock, flags); @@ -403,12 +404,11 @@ SYM53C500_intr(int irq, void *dev_id) if (int_reg & 0x20) { /* Disconnect */ DEB(printk("SYM53C500: disconnect intr received\n")); - if (scsi_pointer->phase != message_in) { /* Unexpected disconnect */ + if (scp->phase != message_in) { /* Unexpected disconnect */ curSC->result = DID_NO_CONNECT << 16; } else { /* Command complete, return status and message */ - curSC->result = (scsi_pointer->Status & 0xff) | - ((scsi_pointer->Message & 0xff) << 8) | - (DID_OK << 16); + curSC->result = (scp->status & 0xff) | + ((scp->message & 0xff) << 8) | (DID_OK << 16); } goto idle_out; } @@ -419,7 +419,7 @@ SYM53C500_intr(int irq, void *dev_id) struct scatterlist *sg; int i; - scsi_pointer->phase = data_out; + scp->phase = data_out; VDEB(printk("SYM53C500: Data-Out phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ @@ -438,7 +438,7 @@ SYM53C500_intr(int irq, void *dev_id) struct scatterlist *sg; int i; - scsi_pointer->phase = data_in; + scp->phase = data_in; VDEB(printk("SYM53C500: Data-In phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ @@ -453,12 +453,12 @@ SYM53C500_intr(int irq, void *dev_id) break; case 0x02: /* COMMAND */ - scsi_pointer->phase = command_ph; + scp->phase = command_ph; printk("SYM53C500: Warning: Unknown interrupt occurred in command phase!\n"); break; case 0x03: /* STATUS */ - scsi_pointer->phase = status_ph; + scp->phase = status_ph; VDEB(printk("SYM53C500: Status phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); outb(INIT_CMD_COMPLETE, port_base + CMD_REG); @@ -471,24 +471,22 @@ SYM53C500_intr(int irq, void *dev_id) case 0x06: /* MESSAGE-OUT */ DEB(printk("SYM53C500: Message-Out phase\n")); - scsi_pointer->phase = message_out; + scp->phase = message_out; outb(SET_ATN, port_base + CMD_REG); /* Reject the message */ outb(MSG_ACCEPT, port_base + CMD_REG); break; case 0x07: /* MESSAGE-IN */ VDEB(printk("SYM53C500: Message-In phase\n")); - scsi_pointer->phase = message_in; + scp->phase = message_in; - scsi_pointer->Status = inb(port_base + SCSI_FIFO); - scsi_pointer->Message = inb(port_base + SCSI_FIFO); + scp->status = inb(port_base + SCSI_FIFO); + scp->message = inb(port_base + SCSI_FIFO); VDEB(printk("SCSI FIFO size=%d\n", inb(port_base + FIFO_FLAGS) & 0x1f)); - DEB(printk("Status = %02x Message = %02x\n", - scsi_pointer->Status, scsi_pointer->Message)); + DEB(printk("Status = %02x Message = %02x\n", scp->status, scp->message)); - if (scsi_pointer->Message == SAVE_POINTERS || - scsi_pointer->Message == DISCONNECT) { + if (scp->message == SAVE_POINTERS || scp->message == DISCONNECT) { outb(SET_ATN, port_base + CMD_REG); /* Reject message */ DEB(printk("Discarding SAVE_POINTERS message\n")); } @@ -500,7 +498,7 @@ out: return IRQ_HANDLED; idle_out: - scsi_pointer->phase = idle; + scp->phase = idle; scsi_done(curSC); goto out; } @@ -548,7 +546,7 @@ SYM53C500_info(struct Scsi_Host *SChost) static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt) { - struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(SCpnt); + struct sym53c500_cmd_priv *scp = scsi_cmd_priv(SCpnt); int i; int port_base = SCpnt->device->host->io_port; struct sym53c500_data *data = @@ -565,9 +563,9 @@ static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt) VDEB(printk("\n")); data->current_SC = SCpnt; - scsi_pointer->phase = command_ph; - scsi_pointer->Status = 0; - scsi_pointer->Message = 0; + scp->phase = command_ph; + scp->status = 0; + scp->message = 0; /* We are locked here already by the mid layer */ REG0(port_base); @@ -682,7 +680,7 @@ static struct scsi_host_template sym53c500_driver_template = { .this_id = 7, .sg_tablesize = 32, .shost_groups = SYM53C500_shost_groups, - .cmd_size = sizeof(struct scsi_pointer), + .cmd_size = sizeof(struct sym53c500_cmd_priv), }; static int SYM53C500_config_check(struct pcmcia_device *p_dev, void *priv_data) From f61eb1216c959f93ffabd3b8781fa5b2b22f8907 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 5 Apr 2022 17:36:37 +0530 Subject: [PATCH 359/708] scsi: mpt3sas: Fail reset operation if config request timed out As part of controller reset operation the driver issues a config request command. If this command gets times out, then fail the controller reset operation instead of retrying it. Link: https://lore.kernel.org/r/20220405120637.20528-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_config.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index 0563078227de..a8dd14c91efd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t retry_count++; if (ioc->config_cmds.smid == smid) mpt3sas_base_free_smid(ioc, smid); - if ((ioc->shost_recovery) || (ioc->config_cmds.status & - MPT3_CMD_RESET) || ioc->pci_error_recovery) + if (ioc->config_cmds.status & MPT3_CMD_RESET) goto retry_config; - issue_host_reset = 1; + if (ioc->shost_recovery || ioc->pci_error_recovery) { + issue_host_reset = 0; + r = -EFAULT; + } else + issue_host_reset = 1; goto free_mem; } From 1700714b1ff252b634db21186db4d91e7e006043 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 2 Mar 2022 00:35:57 -0500 Subject: [PATCH 360/708] scsi: sd: sd_read_cpr() requires VPD pages As such it should be called inside the scsi_device_supports_vpd() conditional. Link: https://lore.kernel.org/r/20220302053559.32147-13-martin.petersen@oracle.com Fixes: e815d36548f0 ("scsi: sd: add concurrent positioning ranges support") Cc: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a390679cf458..cecba3fcbc61 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3216,6 +3216,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits(sdkp); sd_read_block_characteristics(sdkp); sd_zbc_read_zones(sdkp, buffer); + sd_read_cpr(sdkp); } sd_print_capacity(sdkp, old_capacity); @@ -3225,7 +3226,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); - sd_read_cpr(sdkp); } /* From 5f2bce1e222028dc1c15f130109a17aa654ae6e8 Mon Sep 17 00:00:00 2001 From: Alexey Galakhov Date: Wed, 9 Mar 2022 22:25:35 +0100 Subject: [PATCH 361/708] scsi: mvsas: Add PCI ID of RocketRaid 2640 The HighPoint RocketRaid 2640 is a low-cost SAS controller based on Marvell chip. The chip in question was already supported by the kernel, just the PCI ID of this particular board was missing. Link: https://lore.kernel.org/r/20220309212535.402987-1-agalakhov@gmail.com Signed-off-by: Alexey Galakhov Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 7ac63eb5ccd3..2fde496fff5f 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -647,6 +647,7 @@ static struct pci_device_id mvs_pci_table[] = { { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 }, { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 }, { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 }, + { PCI_VDEVICE(TTI, 0x2640), chip_6440 }, { PCI_VDEVICE(TTI, 0x2710), chip_9480 }, { PCI_VDEVICE(TTI, 0x2720), chip_9480 }, { PCI_VDEVICE(TTI, 0x2721), chip_9480 }, From 61144d83376a136d8aa7a9e057d916c505bfb75f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Mar 2022 00:39:27 +0000 Subject: [PATCH 362/708] scsi: message: fusion: Remove redundant variable dmp Variable dmp is being assigned a value that is never read, the variable is redundant and can be removed. Cleans up clang scan build warning: drivers/message/fusion/mptbase.c:6667:39: warning: Although the value stored to 'dmp' is used in the enclosing expression, the value is never actually read from 'dmp' [deadcode.DeadStores] Link: https://lore.kernel.org/r/20220318003927.81471-1-colin.i.king@gmail.com Reviewed-by: Nick Desaulniers Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptbase.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index e90adfa57950..9b3ba2df71c7 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -6658,13 +6658,13 @@ static int mpt_summary_proc_show(struct seq_file *m, void *v) static int mpt_version_proc_show(struct seq_file *m, void *v) { u8 cb_idx; - int scsi, fc, sas, lan, ctl, targ, dmp; + int scsi, fc, sas, lan, ctl, targ; char *drvname; seq_printf(m, "%s-%s\n", "mptlinux", MPT_LINUX_VERSION_COMMON); seq_printf(m, " Fusion MPT base driver\n"); - scsi = fc = sas = lan = ctl = targ = dmp = 0; + scsi = fc = sas = lan = ctl = targ = 0; for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) { drvname = NULL; if (MptCallbacks[cb_idx]) { From 03252259e18e63eb56a0d29c2fefcc30b58b812b Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 31 Mar 2022 21:10:19 -0400 Subject: [PATCH 363/708] scsi: sd: Clean up gendisk if device_add_disk() failed We forgot to call blk_cleanup_disk() when device_add_disk() failed. This would cause a memory leak of gendisk and sched_tags allocated in elevator_init_mq() Reference:https://syzkaller.appspot.com/x/log.txt?x=13b41dcb700000 Reported-and-tested-by: syzbot+f08c77040fa163a75a46@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220401011018.1026553-1-haowenchao@huawei.com Signed-off-by: Wenchao Hao Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cecba3fcbc61..dc6e55761fd1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3475,6 +3475,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { put_device(&sdkp->disk_dev); + blk_cleanup_disk(gd); goto out; } From bfb7789bcbd901caead43861461bc8f334c90d3b Mon Sep 17 00:00:00 2001 From: Xiaomeng Tong Date: Sun, 20 Mar 2022 23:07:33 +0800 Subject: [PATCH 364/708] scsi: ufs: ufshpb: Fix a NULL check on list iterator The list iterator is always non-NULL so the check 'if (!rgn)' is always false and the dev_err() is never called. Move the check outside the loop and determine if 'victim_rgn' is NULL, to fix this bug. Link: https://lore.kernel.org/r/20220320150733.21824-1-xiam0nd.tong@gmail.com Fixes: 4b5f49079c52 ("scsi: ufs: ufshpb: L2P map management for HPB read") Reviewed-by: Daejun Park Signed-off-by: Xiaomeng Tong Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshpb.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index b2bec19022cd..81099b68bbfb 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -867,12 +867,6 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) struct ufshpb_region *rgn, *victim_rgn = NULL; list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) { - if (!rgn) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "%s: no region allocated\n", - __func__); - return NULL; - } if (ufshpb_check_srgns_issue_state(hpb, rgn)) continue; @@ -888,6 +882,11 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) break; } + if (!victim_rgn) + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "%s: no region allocated\n", + __func__); + return victim_rgn; } From 56495f295d8e021f77d065b890fc0100e3f9f6d8 Mon Sep 17 00:00:00 2001 From: Chandrakanth patil Date: Thu, 24 Mar 2022 02:47:11 -0700 Subject: [PATCH 365/708] scsi: megaraid_sas: Target with invalid LUN ID is deleted during scan The megaraid_sas driver supports single LUN for RAID devices. That is LUN 0. All other LUNs are unsupported. When a device scan on a logical target with invalid LUN number is invoked through sysfs, that target ends up getting removed. Add LUN ID validation in the slave destroy function to avoid the target deletion. Link: https://lore.kernel.org/r/20220324094711.48833-1-chandrakanth.patil@broadcom.com Signed-off-by: Chandrakanth patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 3 +++ drivers/scsi/megaraid/megaraid_sas_base.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 611871ef15b5..4919ea54b827 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2560,6 +2560,9 @@ struct megasas_instance_template { #define MEGASAS_IS_LOGICAL(sdev) \ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) +#define MEGASAS_IS_LUN_VALID(sdev) \ + (((sdev)->lun == 0) ? 1 : 0) + #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ scp->device->id) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8bf72dbc33b7..db6793608447 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) goto scan_target; } return -ENXIO; + } else if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return -ENXIO; } scan_target: @@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev) instance = megasas_lookup_instance(sdev->host->host_no); if (MEGASAS_IS_LOGICAL(sdev)) { + if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return; + } ld_tgt_id = MEGASAS_TARGET_ID(sdev); instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; if (megasas_dbg_lvl & LD_PD_DEBUG) From ec4eb8a86ade4d22633e1da2a7d85a846b7d1798 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 5 Apr 2022 21:22:06 +0800 Subject: [PATCH 366/708] drivers: net: slip: fix NPD bug in sl_tx_timeout() When a slip driver is detaching, the slip_close() will act to cleanup necessary resources and sl->tty is set to NULL in slip_close(). Meanwhile, the packet we transmit is blocked, sl_tx_timeout() will be called. Although slip_close() and sl_tx_timeout() use sl->lock to synchronize, we don`t judge whether sl->tty equals to NULL in sl_tx_timeout() and the null pointer dereference bug will happen. (Thread 1) | (Thread 2) | slip_close() | spin_lock_bh(&sl->lock) | ... ... | sl->tty = NULL //(1) sl_tx_timeout() | spin_unlock_bh(&sl->lock) spin_lock(&sl->lock); | ... | ... tty_chars_in_buffer(sl->tty)| if (tty->ops->..) //(2) | ... | synchronize_rcu() We set NULL to sl->tty in position (1) and dereference sl->tty in position (2). This patch adds check in sl_tx_timeout(). If sl->tty equals to NULL, sl_tx_timeout() will goto out. Signed-off-by: Duoming Zhou Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20220405132206.55291-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/slip/slip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 88396ff99f03..6865d32270e5 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -469,7 +469,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? From 02de9331c4d0c6bddac9c5fa66d91f70adf8612b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 16 Mar 2022 13:51:29 +0100 Subject: [PATCH 367/708] KVM: selftests: get-reg-list: Add KVM_REG_ARM_FW_REG(3) When testing a kernel with commit a5905d6af492 ("KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated") get-reg-list output vregs: Number blessed registers: 234 vregs: Number registers: 238 vregs: There are 1 new registers. Consider adding them to the blessed reg list with the following lines: KVM_REG_ARM_FW_REG(3), vregs: PASS ... That output inspired two changes: 1) add the new register to the blessed list and 2) explain why "Number registers" is actually four larger than "Number blessed registers" (on the system used for testing), even though only one register is being stated as new. The reason is that some registers are host dependent and they get filtered out when comparing with the blessed list. The system used for the test apparently had three filtered registers. Signed-off-by: Andrew Jones Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220316125129.392128-1-drjones@redhat.com --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index f12147c43464..0b571f3fe64c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c) ++missing_regs; if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); } if (new_regs) { @@ -683,9 +688,10 @@ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), - KVM_REG_ARM_FW_REG(0), - KVM_REG_ARM_FW_REG(1), - KVM_REG_ARM_FW_REG(2), + KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ + KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ + KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ + KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 2), From a44a4cc1c969afec97dbb2aedaf6f38eaa6253bb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:13 +0000 Subject: [PATCH 368/708] KVM: Don't create VM debugfs files outside of the VM directory Unfortunately, there is no guarantee that KVM was able to instantiate a debugfs directory for a particular VM. To that end, KVM shouldn't even attempt to create new debugfs files in this case. If the specified parent dentry is NULL, debugfs_create_file() will instantiate files at the root of debugfs. For arm64, it is possible to create the vgic-state file outside of a VM directory, the file is not cleaned up when a VM is destroyed. Nonetheless, the corresponding struct kvm is freed when the VM is destroyed. Nip the problem in the bud for all possible errant debugfs file creations by initializing kvm->debugfs_dentry to -ENOENT. In so doing, debugfs_create_file() will fail instead of creating the file in the root directory. Cc: stable@kernel.org Fixes: 929f45e32499 ("kvm: no need to check return value of debugfs_create functions") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-2-oupton@google.com --- virt/kvm/kvm_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70e05af5ebea..e39a6f56fc47 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -932,7 +932,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; - if (!kvm->debugfs_dentry) + if (IS_ERR(kvm->debugfs_dentry)) return; debugfs_remove_recursive(kvm->debugfs_dentry); @@ -955,6 +955,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; + /* + * Force subsequent debugfs file creations to fail if the VM directory + * is not created. + */ + kvm->debugfs_dentry = ERR_PTR(-ENOENT); + if (!debugfs_initialized()) return 0; @@ -5479,7 +5485,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { From 386ba265a8197716076a88853244f4437b92b167 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:14 +0000 Subject: [PATCH 369/708] selftests: KVM: Don't leak GIC FD across dirty log test iterations dirty_log_perf_test instantiates a VGICv3 for the guest (if supported by hardware) to reduce the overhead of guest exits. However, the test does not actually close the GIC fd when cleaning up the VM between test iterations, meaning that the VM is never actually destroyed in the kernel. While this is generally a bad idea, the bug was detected from the kernel spewing about duplicate debugfs entries as subsequent VMs happen to reuse the same FD even though the debugfs directory is still present. Abstract away the notion of setup/cleanup of the GIC FD from the test by creating arch-specific helpers for test setup/cleanup. Close the GIC FD on VM cleanup and do nothing for the other architectures. Fixes: c340f7899af6 ("KVM: selftests: Add vgic initialization for dirty log perf test for ARM") Reviewed-by: Jing Zhang Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-3-oupton@google.com --- .../selftests/kvm/dirty_log_perf_test.c | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index c9d9e513ca04..7b47ae4f952e 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,11 +18,40 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" + #ifdef __aarch64__ #include "aarch64/vgic.h" #define GICD_BASE_GPA 0x8000000ULL #define GICR_BASE_GPA 0x80A0000ULL + +static int gic_fd; + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + /* + * The test can still run even if hardware does not support GICv3, as it + * is only an optimization to reduce guest exits. + */ + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ + if (gic_fd > 0) + close(gic_fd); +} + +#else /* __aarch64__ */ + +static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +static void arch_cleanup_vm(struct kvm_vm *vm) +{ +} + #endif /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ @@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } -#ifdef __aarch64__ - vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); -#endif + arch_setup_vm(vm, nr_vcpus); /* Start the iterations */ iteration = 0; @@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); + arch_cleanup_vm(vm); perf_test_destroy_vm(vm); } From 21db83846683d3987666505a3ec38f367708199a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 6 Apr 2022 23:56:15 +0000 Subject: [PATCH 370/708] selftests: KVM: Free the GIC FD when cleaning up in arch_timer In order to correctly destroy a VM, all references to the VM must be freed. The arch_timer selftest creates a VGIC for the guest, which itself holds a reference to the VM. Close the GIC FD when cleaning up a VM. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406235615.1447180-4-oupton@google.com --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index b08d30bf71c5..3b940a101bc0 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; - int ret; int nr_vcpus = test_args.nr_vcpus; vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); @@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void) ucall_init(vm, NULL); test_init_timer_irq(vm); - ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); - if (ret < 0) { + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + if (gic_fd < 0) { print_skip("Failed to create vgic-v3"); exit(KSFT_SKIP); } @@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void) return vm; } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n", @@ -478,7 +485,7 @@ int main(int argc, char *argv[]) vm = test_vm_create(); test_run(vm); - kvm_vm_free(vm); + test_vm_cleanup(vm); return 0; } From 9eb6f5c388060d8cef3c8b616cc31b765e022359 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Tue, 5 Apr 2022 12:20:29 -0600 Subject: [PATCH 371/708] ALSA: hda/realtek: Add quirk for Clevo PD50PNT Fixes speaker output and headset detection on Clevo PD50PNT. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20220405182029.27431-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aace474a899d..61df440fdb61 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2619,6 +2619,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), From 9dd7c46346ca4390f84a7ea9933005eb1b175c15 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Apr 2022 16:41:18 -0700 Subject: [PATCH 372/708] sound/oss/dmasound: fix build when drivers are mixed =y/=m When CONFIG_DMASOUND_ATARI=m and CONFIG_DMASOUND_Q40=y (or vice versa), dmasound_core.o can be built without dmasound_deinit() being defined, causing a build error: ERROR: modpost: "dmasound_deinit" [sound/oss/dmasound/dmasound_atari.ko] undefined! Modify dmasound_core.c and dmasound.h so that dmasound_deinit() is always available. The mixed modes (=y/=m) also mean that several variables and structs have to be declared in all cases. Suggested-by: Arnd Bergmann Suggested-by: Geert Uytterhoeven Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202204032138.EFT9qGEd-lkp@intel.com Cc: Geert Uytterhoeven Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Link: https://lore.kernel.org/r/20220405234118.24830-1-rdunlap@infradead.org Signed-off-by: Takashi Iwai --- sound/oss/dmasound/dmasound.h | 6 ------ sound/oss/dmasound/dmasound_core.c | 24 +----------------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h index c1c52b479da2..ad8ce6a1c25c 100644 --- a/sound/oss/dmasound/dmasound.h +++ b/sound/oss/dmasound/dmasound.h @@ -88,11 +88,7 @@ static inline int ioctl_return(int __user *addr, int value) */ extern int dmasound_init(void); -#ifdef MODULE extern void dmasound_deinit(void); -#else -#define dmasound_deinit() do { } while (0) -#endif /* description of the set-up applies to either hard or soft settings */ @@ -114,9 +110,7 @@ typedef struct { void *(*dma_alloc)(unsigned int, gfp_t); void (*dma_free)(void *, unsigned int); int (*irqinit)(void); -#ifdef MODULE void (*irqcleanup)(void); -#endif void (*init)(void); void (*silence)(void); int (*setFormat)(int); diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 0c95828ac0b1..9c48f3a9e3d1 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -206,12 +206,10 @@ module_param(writeBufSize, int, 0); MODULE_LICENSE("GPL"); -#ifdef MODULE static int sq_unit = -1; static int mixer_unit = -1; static int state_unit = -1; static int irq_installed; -#endif /* MODULE */ /* control over who can modify resources shared between play/record */ static fmode_t shared_resource_owner; @@ -391,9 +389,6 @@ static const struct file_operations mixer_fops = static void mixer_init(void) { -#ifndef MODULE - int mixer_unit; -#endif mixer_unit = register_sound_mixer(&mixer_fops, -1); if (mixer_unit < 0) return; @@ -1171,9 +1166,6 @@ static const struct file_operations sq_fops = static int sq_init(void) { const struct file_operations *fops = &sq_fops; -#ifndef MODULE - int sq_unit; -#endif sq_unit = register_sound_dsp(fops, -1); if (sq_unit < 0) { @@ -1366,9 +1358,6 @@ static const struct file_operations state_fops = { static int state_init(void) { -#ifndef MODULE - int state_unit; -#endif state_unit = register_sound_special(&state_fops, SND_DEV_STATUS); if (state_unit < 0) return state_unit ; @@ -1386,10 +1375,9 @@ static int state_init(void) int dmasound_init(void) { int res ; -#ifdef MODULE + if (irq_installed) return -EBUSY; -#endif /* Set up sound queue, /dev/audio and /dev/dsp. */ @@ -1408,9 +1396,7 @@ int dmasound_init(void) printk(KERN_ERR "DMA sound driver: Interrupt initialization failed\n"); return -ENODEV; } -#ifdef MODULE irq_installed = 1; -#endif printk(KERN_INFO "%s DMA sound driver rev %03d installed\n", dmasound.mach.name, (DMASOUND_CORE_REVISION<<4) + @@ -1424,8 +1410,6 @@ int dmasound_init(void) return 0; } -#ifdef MODULE - void dmasound_deinit(void) { if (irq_installed) { @@ -1444,8 +1428,6 @@ void dmasound_deinit(void) unregister_sound_dsp(sq_unit); } -#else /* !MODULE */ - static int dmasound_setup(char *str) { int ints[6], size; @@ -1489,8 +1471,6 @@ static int dmasound_setup(char *str) __setup("dmasound=", dmasound_setup); -#endif /* !MODULE */ - /* * Conversion tables */ @@ -1577,9 +1557,7 @@ char dmasound_alaw2dma8[] = { EXPORT_SYMBOL(dmasound); EXPORT_SYMBOL(dmasound_init); -#ifdef MODULE EXPORT_SYMBOL(dmasound_deinit); -#endif EXPORT_SYMBOL(dmasound_write_sq); EXPORT_SYMBOL(dmasound_catchRadius); #ifdef HAS_8BIT_TABLES From d52eee988597ac2a2c5d17d842946616d7d41070 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 6 Apr 2022 14:04:18 -0500 Subject: [PATCH 373/708] ALSA: hda: intel-dsp-config: update AlderLake PCI IDs Add missing AlderLake-PS and RaptorLake-S PCI IDs (already in HDaudio and SOF drivers), add comments and regroup by skew. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220406190418.245044-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 70fd8b13938e..8b0a16ba27d3 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -390,22 +390,36 @@ static const struct config_entry config_table[] = { /* Alder Lake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE) + /* Alderlake-S */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x7ad0, }, + /* RaptorLake-S */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x7a50, + }, + /* Alderlake-P */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x51c8, }, - { - .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, - .device = 0x51cc, - }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x51cd, }, + /* Alderlake-PS */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x51c9, + }, + /* Alderlake-M */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x51cc, + }, + /* Alderlake-N */ { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x54c8, From be8a096521ca1a252bf078b347f96ce94582612e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Mar 2022 13:13:41 +0200 Subject: [PATCH 374/708] x86,bpf: Avoid IBT objtool warning Clang can inline emit_indirect_jump() and then folds constants, which results in: | vmlinux.o: warning: objtool: emit_bpf_dispatcher()+0x6a4: relocation to !ENDBR: .text.__x86.indirect_thunk+0x40 | vmlinux.o: warning: objtool: emit_bpf_dispatcher()+0x67d: relocation to !ENDBR: .text.__x86.indirect_thunk+0x40 | vmlinux.o: warning: objtool: emit_bpf_tail_call_indirect()+0x386: relocation to !ENDBR: .text.__x86.indirect_thunk+0x20 | vmlinux.o: warning: objtool: emit_bpf_tail_call_indirect()+0x35d: relocation to !ENDBR: .text.__x86.indirect_thunk+0x20 Suppress the optimization such that it must emit a code reference to the __x86_indirect_thunk_array[] base. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Link: https://lkml.kernel.org/r/20220405075531.GB30877@worktop.programming.kicks-ass.net --- arch/x86/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8fe35ed11fd6..16b6efacf7c6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,6 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else #endif From 334865b2915c33080624e0d06f1c3e917036472c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 29 Mar 2022 13:21:45 -0700 Subject: [PATCH 375/708] x86/extable: Prefer local labels in .set directives Bernardo reported an error that Nathan bisected down to (x86_64) defconfig+LTO_CLANG_FULL+X86_PMEM_LEGACY. LTO vmlinux.o ld.lld: error: :1:13: redefinition of 'found' .set found, 0 ^ :29:1: while in macro instantiation extable_type_reg reg=%eax, type=(17 | ((0) << 16)) ^ This appears to be another LTO specific issue similar to what was folded into commit 4b5305decc84 ("x86/extable: Extend extable functionality"), where the `.set found, 0` in DEFINE_EXTABLE_TYPE_REG in arch/x86/include/asm/asm.h conflicts with the symbol for the static function `found` in arch/x86/kernel/pmem.c. Assembler .set directive declare symbols with global visibility, so the assembler may not rename such symbols in the event of a conflict. LTO could rename static functions if there was a conflict in C sources, but it cannot see into symbols defined in inline asm. The symbols are also retained in the symbol table, regardless of LTO. Give the symbols .L prefixes making them locally visible, so that they may be renamed for LTO to avoid conflicts, and to drop them from the symbol table regardless of LTO. Fixes: 4b5305decc84 ("x86/extable: Extend extable functionality") Reported-by: Bernardo Meurer Costa Debugged-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20220329202148.2379697-1-ndesaulniers@google.com --- arch/x86/include/asm/asm.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c878fed3056f..fbcfec4dc4cc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -154,24 +154,24 @@ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ - ".set found, 0\n" \ - ".set regnr, 0\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".set regnr, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".if (found != 1)\n" \ + ".if (.Lfound != 1)\n" \ ".error \"extable_type_reg: bad register argument\"\n" \ ".endif\n" \ ".endm\n" From 5063b7a80eba25960464d2a366b66544810d9694 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Apr 2022 14:02:06 +0100 Subject: [PATCH 376/708] ARM: vexpress/spc: Fix kernel-doc build warning for ve_spc_cpu_in_wfi Kbuild bot reported the following kernel-doc build warning: | arch/arm/mach-versatile/spc.c:231: warning: This comment starts with | '/**', but isn't a kernel-doc comment. | Refer Documentation/doc-guide/kernel-doc.rst | * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster) Fix the issue by dropping the parameters specified in the kernel doc. Link: https://lore.kernel.org/linux-doc/202204031026.4ogKxt89-lkp@intel.com Link: https://lore.kernel.org/r/20220404130207.1162445-1-sudeep.holla@arm.com Cc: Liviu Dudau Cc: Lorenzo Pieralisi Reported-by: kernel test robot Signed-off-by: Sudeep Holla --- arch/arm/mach-vexpress/spc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 1c6500c4e6a1..8f99d47d4b89 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -228,7 +228,7 @@ static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster) } /** - * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster) + * ve_spc_cpu_in_wfi() * * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster * @cluster: mpidr[15:8] bitfield describing cluster affinity level From 42a997f0bde1da0cce14a4768bb190b5030513eb Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Apr 2022 14:02:07 +0100 Subject: [PATCH 377/708] ARM: vexpress/spc: Fix all the kernel-doc build warnings There are more kernel-doc build warnings as below than the ones reported by kernel test robot recently for this file. | arch/arm/mach-vexpress/spc.c:125: warning: missing initial short description on line: | * ve_spc_global_wakeup_irq() | arch/arm/mach-vexpress/spc.c:131: warning: contents before sections | arch/arm/mach-vexpress/spc.c:148: warning: missing initial short description on line: | * ve_spc_cpu_wakeup_irq() | arch/arm/mach-vexpress/spc.c:154: warning: contents before sections | arch/arm/mach-vexpress/spc.c:203: warning: missing initial short description on line: | * ve_spc_powerdown() | arch/arm/mach-vexpress/spc.c:209: warning: contents before sections | arch/arm/mach-vexpress/spc.c:231: warning: missing initial short description on line: | * ve_spc_cpu_in_wfi() | 7 warnings Fix all these warnings. Link: https://lore.kernel.org/r/20220404130207.1162445-2-sudeep.holla@arm.com Cc: Liviu Dudau Cc: Lorenzo Pieralisi Signed-off-by: Sudeep Holla --- arch/arm/mach-vexpress/spc.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 8f99d47d4b89..6e6985e756af 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -122,13 +122,13 @@ static inline bool cluster_is_a15(u32 cluster) } /** - * ve_spc_global_wakeup_irq() + * ve_spc_global_wakeup_irq() - sets/clears global wakeup IRQs + * + * @set: if true, global wake-up IRQs are set, if false they are cleared * * Function to set/clear global wakeup IRQs. Not protected by locking since * it might be used in code paths where normal cacheable locks are not * working. Locking must be provided by the caller to ensure atomicity. - * - * @set: if true, global wake-up IRQs are set, if false they are cleared */ void ve_spc_global_wakeup_irq(bool set) { @@ -145,15 +145,15 @@ void ve_spc_global_wakeup_irq(bool set) } /** - * ve_spc_cpu_wakeup_irq() - * - * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since - * it might be used in code paths where normal cacheable locks are not - * working. Locking must be provided by the caller to ensure atomicity. + * ve_spc_cpu_wakeup_irq() - sets/clears per-CPU wake-up IRQs * * @cluster: mpidr[15:8] bitfield describing cluster affinity level * @cpu: mpidr[7:0] bitfield describing cpu affinity level * @set: if true, wake-up IRQs are set, if false they are cleared + * + * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since + * it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. */ void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set) { @@ -200,14 +200,14 @@ void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr) } /** - * ve_spc_powerdown() + * ve_spc_powerdown() - enables/disables cluster powerdown + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @enable: if true enables powerdown, if false disables it * * Function to enable/disable cluster powerdown. Not protected by locking * since it might be used in code paths where normal cacheable locks are not * working. Locking must be provided by the caller to ensure atomicity. - * - * @cluster: mpidr[15:8] bitfield describing cluster affinity level - * @enable: if true enables powerdown, if false disables it */ void ve_spc_powerdown(u32 cluster, bool enable) { @@ -228,7 +228,7 @@ static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster) } /** - * ve_spc_cpu_in_wfi() + * ve_spc_cpu_in_wfi() - Checks if the specified CPU is in WFI or not * * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster * @cluster: mpidr[15:8] bitfield describing cluster affinity level From 711136bb6620b4e84498aa87d4a2ceb7b70c8176 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 6 Apr 2022 14:06:27 +0200 Subject: [PATCH 378/708] s390/kexec: silence -Warray-bounds warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just use absolute_pointer() like e.g. in commit 545c272232ca ("alpha: Silence -Warray-bounds warnings") to get rid of this warning: arch/s390/kernel/machine_kexec.c:59:9: warning: ‘memcpy’ offset [0, 511] is out of the bounds [0, 0] [-Warray-bounds] Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b2ef014a9287..6ebf02e15c85 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -54,7 +54,7 @@ static void __do_machine_kdump(void *image) * This need to be done *after* s390_reset_system set the * prefix register of this CPU to zero */ - memcpy((void *) __LC_FPREGS_SAVE_AREA, + memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512); __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); From 03e59b1e2f56245163b14c69e0a830c24b1a3a47 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 4 Apr 2022 13:49:02 +0200 Subject: [PATCH 379/708] mmc: renesas_sdhi: don't overwrite TAP settings when HS400 tuning is complete When HS400 tuning is complete and HS400 is going to be activated, we have to keep the current number of TAPs and should not overwrite them with a hardcoded value. This was probably a copy&paste mistake when upporting HS400 support from the BSP. Fixes: 26eb2607fa28 ("mmc: renesas_sdhi: add eMMC HS400 mode support") Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220404114902.12175-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 2a4d314aa027..ddb5ca2f559e 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -396,10 +396,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc) SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) | sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); - /* Set the sampling clock selection range of HS400 mode */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL, SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN | - 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT); + sd_scc_read32(host, priv, + SH_MOBILE_SDHI_SCC_DTCNTL)); /* Avoid bad TAP */ if (bad_taps & BIT(priv->tap_set)) { From 3b68b08885217abd9c57ff9b3bb3eb173eee02a9 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Sun, 30 Jan 2022 16:25:02 +0100 Subject: [PATCH 380/708] ep93xx: clock: Fix UAF in ep93xx_clk_register_gate() arch/arm/mach-ep93xx/clock.c:154:2: warning: Use of memory after it is freed [clang-analyzer-unix.Malloc] arch/arm/mach-ep93xx/clock.c:151:2: note: Taking true branch if (IS_ERR(clk)) ^ arch/arm/mach-ep93xx/clock.c:152:3: note: Memory is released kfree(psc); ^~~~~~~~~~ arch/arm/mach-ep93xx/clock.c:154:2: note: Use of memory after it is freed return &psc->hw; ^ ~~~~~~~~ Fixes: 9645ccc7bd7a ("ep93xx: clock: convert in-place to COMMON_CLK") Reported-by: kernel test robot Signed-off-by: Alexander Sverdlin Cc: stable@vger.kernel.org Link: https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/B5YCO2NJEXINCYE26Y255LCVMO55BGWW/ Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/clock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index cc75087134d3..28e0ae6e890e 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -148,8 +148,10 @@ static struct clk_hw *ep93xx_clk_register_gate(const char *name, psc->lock = &clk_lock; clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { kfree(psc); + return ERR_CAST(clk); + } return &psc->hw; } From caee01050bd483f1b6f6abc686a3516e48e2ad9e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 20 Jan 2022 14:37:39 +0100 Subject: [PATCH 381/708] ep93xx: clock: Don't use plain integer as NULL pointer Fix sparse warning: arch/arm/mach-ep93xx/clock.c:210:35: sparse: sparse: Using plain integer as NULL pointer Reported-by: kernel test robot Signed-off-by: Alexander Sverdlin Link: https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/TLFJ6D7WGMDJSQ6XK7UZE4XR2PLRZJSV/ Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 28e0ae6e890e..4fa6ea5461b7 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -209,7 +209,7 @@ static int ep93xx_mux_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { unsigned long rate = req->rate; - struct clk *best_parent = 0; + struct clk *best_parent = NULL; unsigned long __parent_rate; unsigned long best_rate = 0, actual_rate, mclk_rate; unsigned long best_parent_rate; From 1f5fb1dc7497776a7dd12420ae87382e61718bf5 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 5 Apr 2022 14:52:52 +0100 Subject: [PATCH 382/708] arm: configs: imote2: Drop defconfig as board support dropped. Missed the defconfig when removing the board files causing failures in builds using this defconfig. Fixes: 28f74201e37c ("ARM: pxa: remove Intel Imote2 and Stargate 2 boards") Reported-by: Sudip Mukherjee Signed-off-by: Jonathan Cameron Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220405135252.10283-1-Jonathan.Cameron@huawei.com' Signed-off-by: Arnd Bergmann --- arch/arm/configs/imote2_defconfig | 365 ------------------------------ 1 file changed, 365 deletions(-) delete mode 100644 arch/arm/configs/imote2_defconfig diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig deleted file mode 100644 index 015b7ef237de..000000000000 --- a/arch/arm/configs/imote2_defconfig +++ /dev/null @@ -1,365 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_EXPERT=y -# CONFIG_COMPAT_BRK is not set -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_ARCH_PXA=y -CONFIG_MACH_INTELMOTE2=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y -CONFIG_AEABI=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="root=/dev/mtdblock2 rootfstype=jffs2 console=ttyS2,115200 mem=32M" -CONFIG_KEXEC=y -CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -CONFIG_APM_EMULATION=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_DIAG is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_SCTP=y -CONFIG_NF_CT_PROTO_UDPLITE=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_BRIDGE=m -# CONFIG_BRIDGE_IGMP_SNOOPING is not set -CONFIG_IEEE802154=y -# CONFIG_WIRELESS is not set -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_FW_LOADER=m -CONFIG_CONNECTOR=m -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_AFS_PARTS=y -CONFIG_MTD_AR7_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_GEOMETRY=y -# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set -# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set -# CONFIG_MTD_CFI_I2 is not set -CONFIG_MTD_OTP=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_PXA2XX=y -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_NETDEVICES=y -CONFIG_DUMMY=y -# CONFIG_WLAN is not set -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -# CONFIG_INPUT_MOUSEDEV is not set -CONFIG_INPUT_EVDEV=y -# CONFIG_KEYBOARD_ATKBD is not set -CONFIG_KEYBOARD_GPIO=y -CONFIG_KEYBOARD_PXA27x=y -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_INPUT_MISC=y -CONFIG_INPUT_UINPUT=y -# CONFIG_SERIO is not set -CONFIG_SERIAL_PXA=y -CONFIG_SERIAL_PXA_CONSOLE=y -CONFIG_LEGACY_PTY_COUNT=8 -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_PXA=y -CONFIG_SPI=y -CONFIG_SPI_PXA2XX=y -CONFIG_GPIO_SYSFS=y -CONFIG_POWER_SUPPLY=y -# CONFIG_HWMON is not set -CONFIG_PMIC_DA903X=y -CONFIG_REGULATOR=y -CONFIG_REGULATOR_DEBUG=y -CONFIG_REGULATOR_DA903X=y -CONFIG_MEDIA_SUPPORT=y -CONFIG_VIDEO_DEV=y -CONFIG_MEDIA_TUNER_CUSTOMISE=y -# CONFIG_MEDIA_TUNER_SIMPLE is not set -# CONFIG_MEDIA_TUNER_TDA8290 is not set -# CONFIG_MEDIA_TUNER_TDA827X is not set -# CONFIG_MEDIA_TUNER_TDA18271 is not set -# CONFIG_MEDIA_TUNER_TDA9887 is not set -# CONFIG_MEDIA_TUNER_TEA5761 is not set -# CONFIG_MEDIA_TUNER_TEA5767 is not set -# CONFIG_MEDIA_TUNER_MT20XX is not set -# CONFIG_MEDIA_TUNER_MT2060 is not set -# CONFIG_MEDIA_TUNER_MT2266 is not set -# CONFIG_MEDIA_TUNER_MT2131 is not set -# CONFIG_MEDIA_TUNER_QT1010 is not set -# CONFIG_MEDIA_TUNER_XC2028 is not set -# CONFIG_MEDIA_TUNER_XC5000 is not set -# CONFIG_MEDIA_TUNER_MXL5005S is not set -# CONFIG_MEDIA_TUNER_MXL5007T is not set -# CONFIG_MEDIA_TUNER_MC44S803 is not set -# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set -CONFIG_VIDEO_PXA27x=y -# CONFIG_V4L_USB_DRIVERS is not set -# CONFIG_RADIO_ADAPTERS is not set -CONFIG_FB=y -CONFIG_FB_PXA=y -CONFIG_FB_PXA_OVERLAY=y -CONFIG_FB_PXA_PARAMETERS=y -# CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_MINI_4x6=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -# CONFIG_SND_DRIVERS is not set -# CONFIG_SND_ARM is not set -# CONFIG_SND_SPI is not set -# CONFIG_SND_USB is not set -CONFIG_SND_SOC=y -CONFIG_SND_PXA2XX_SOC=y -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_GADGET=y -CONFIG_USB_PXA27X=y -CONFIG_USB_ETH=m -# CONFIG_USB_ETH_RNDIS is not set -CONFIG_MMC=y -CONFIG_SDIO_UART=m -CONFIG_MMC_PXA=y -CONFIG_MMC_SPI=y -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_LP3944=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_BACKLIGHT=y -CONFIG_LEDS_TRIGGER_GPIO=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_PXA=y -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=m -CONFIG_AUTOFS4_FS=y -CONFIG_FUSE_FS=m -CONFIG_CUSE=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_WBUF_VERIFY=y -CONFIG_JFFS2_SUMMARY=y -CONFIG_JFFS2_FS_XATTR=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_JFFS2_LZO=y -CONFIG_JFFS2_RUBIN=y -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_ROMFS_FS=m -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_SMB_FS=m -CONFIG_CIFS=m -CONFIG_CIFS_STATS=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_PROVE_LOCKING=y -# CONFIG_FTRACE is not set -CONFIG_DEBUG_USER=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_GHASH=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=y From 0dc23d1a8e17839f1c071302b5f3e04a34692d44 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:09:47 -0500 Subject: [PATCH 383/708] arm: dts: at91: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/Yk3leykDEKGBN8rk@robh.at.kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/at91-kizbox3-hs.dts | 2 +- arch/arm/boot/dts/at91-kizbox3_common.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-kizbox3-hs.dts b/arch/arm/boot/dts/at91-kizbox3-hs.dts index 2799b2a1f4d2..f7d90cf1bb77 100644 --- a/arch/arm/boot/dts/at91-kizbox3-hs.dts +++ b/arch/arm/boot/dts/at91-kizbox3-hs.dts @@ -225,7 +225,7 @@ pinctrl_pio_io_reset: gpio_io_reset { pinmux = ; bias-disable; - drive-open-drain = <1>; + drive-open-drain; output-low; }; pinctrl_pio_input: gpio_input { diff --git a/arch/arm/boot/dts/at91-kizbox3_common.dtsi b/arch/arm/boot/dts/at91-kizbox3_common.dtsi index abe27adfa4d6..465664628419 100644 --- a/arch/arm/boot/dts/at91-kizbox3_common.dtsi +++ b/arch/arm/boot/dts/at91-kizbox3_common.dtsi @@ -211,7 +211,7 @@ pinmux = , //DATA ; //CLK bias-disable; - drive-open-drain = <1>; + drive-open-drain; }; pinctrl_pwm0 { From 1a67653de0ddc67d274ce2762265ae18d58cc09a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:17:30 -0500 Subject: [PATCH 384/708] arm64: dts: tegra: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/Yk3nShkFzNJaI3/Z@robh.at.kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi | 8 ++++---- .../boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi | 6 +++--- arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts | 8 ++++---- arch/arm64/boot/dts/nvidia/tegra210-smaug.dts | 4 ++-- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index aff857df25cf..1df84335925b 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -262,25 +262,25 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull = <1>; + drive-push-pull; }; gpio5 { pins = "gpio5"; function = "gpio"; - drive-push-pull = <0>; + drive-push-pull; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull = <0>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts index 4631504c3c7a..1ab132c152bb 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts @@ -462,25 +462,25 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull = <1>; + drive-push-pull; }; gpio5 { pins = "gpio5"; function = "gpio"; - drive-push-pull = <0>; + drive-push-pull; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index a7d7cfd66379..634d0f493c2e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -174,19 +174,19 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull = <1>; + drive-push-pull; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull = <0>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi index 0bd66f9c620b..0b219e72765e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi @@ -148,19 +148,19 @@ gpio4 { pins = "gpio4"; function = "32k-out1"; - drive-push-pull = <1>; + drive-push-pull; }; gpio6 { pins = "gpio6"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; gpio7 { pins = "gpio7"; function = "gpio"; - drive-push-pull = <0>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 75eb743a7242..0fe772b04bd0 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -59,7 +59,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull = <1>; + drive-push-pull; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <7>; maxim,active-fps-power-down-slot = <0>; @@ -68,7 +68,7 @@ gpio2_3 { pins = "gpio2", "gpio3"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; }; @@ -80,7 +80,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi index 10347b6e6e84..936a309e288c 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi @@ -1351,7 +1351,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull = <1>; + drive-push-pull; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <7>; maxim,active-fps-power-down-slot = <0>; @@ -1360,14 +1360,14 @@ gpio2 { pins = "gpio2"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; }; gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; }; @@ -1379,7 +1379,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts index 72c2dc3c14ea..f6446120c267 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts @@ -195,7 +195,7 @@ gpio1 { pins = "gpio1"; function = "fps-out"; - drive-push-pull = <1>; + drive-push-pull; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <0>; maxim,active-fps-power-down-slot = <7>; @@ -204,7 +204,7 @@ gpio2 { pins = "gpio2"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <0>; maxim,active-fps-power-down-slot = <7>; @@ -213,7 +213,7 @@ gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <4>; maxim,active-fps-power-down-slot = <3>; @@ -227,7 +227,7 @@ gpio5_6_7 { pins = "gpio5", "gpio6", "gpio7"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts index a263d51882ee..e42384f097d6 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts +++ b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts @@ -1386,7 +1386,7 @@ gpio3 { pins = "gpio3"; function = "fps-out"; - drive-open-drain = <1>; + drive-open-drain; maxim,active-fps-source = ; maxim,active-fps-power-up-slot = <4>; maxim,active-fps-power-down-slot = <2>; @@ -1395,7 +1395,7 @@ gpio5_6 { pins = "gpio5", "gpio6"; function = "gpio"; - drive-push-pull = <1>; + drive-push-pull; }; gpio4 { From 3b881035e959cf39f046484e340e48a3e46a99db Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:13:11 -0500 Subject: [PATCH 385/708] arm: dts: imx: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/Yk3mR5yae3gCkKhp@robh.at.kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi | 32 +++++++++---------- .../dts/imx6ul-phytec-segin-peb-av-02.dtsi | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi index 563bf9d44fe0..0b90c3f59f89 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi @@ -154,112 +154,112 @@ regulators { bcore1 { regulator-name = "bcore1"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bcore2 { regulator-name = "bcore2"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bpro { regulator-name = "bpro"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bperi { regulator-name = "bperi"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bmem { regulator-name = "bmem"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo2 { regulator-name = "ldo2"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <1800000>; }; ldo3 { regulator-name = "ldo3"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo4 { regulator-name = "ldo4"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo5 { regulator-name = "ldo5"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo6 { regulator-name = "ldo6"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo7 { regulator-name = "ldo7"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo8 { regulator-name = "ldo8"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo9 { regulator-name = "ldo9"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo10 { regulator-name = "ldo10"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; ldo11 { regulator-name = "ldo11"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <300000>; regulator-max-microvolt = <3300000>; }; bio { regulator-name = "bio"; - regulator-always-on = <1>; + regulator-always-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; diff --git a/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi b/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi index 7cda6944501d..205e4d462702 100644 --- a/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi +++ b/arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi @@ -72,8 +72,8 @@ st,settling = <2>; st,fraction-z = <7>; st,i-drive = <1>; - touchscreen-inverted-x = <1>; - touchscreen-inverted-y = <1>; + touchscreen-inverted-x; + touchscreen-inverted-y; }; }; }; From 1bc12d301594eafde0a8529d28d459af81053b3a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:14:41 -0500 Subject: [PATCH 386/708] arm64: dts: imx: Fix imx8*-var-som touchscreen property sizes The common touchscreen properties are all 32-bit, not 16-bit. These properties must not be too important as they are all ignored in case of an error reading them. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/Yk3moe6Hz8ELM0iS@robh.at.kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 1dc9d187601c..a0bd540f27d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -89,12 +89,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index b16c7caf34c1..87b5e23c766f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -70,12 +70,12 @@ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; From bc2fb47db586dc807be96a6d79045616771b53d4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 6 Apr 2022 14:16:07 -0500 Subject: [PATCH 387/708] arm/arm64: dts: qcom: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/Yk3m92Sj26/v1mLG@robh.at.kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/qcom-apq8064-pins.dtsi | 12 ++++++------ arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi | 2 +- arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/qcom-apq8064-pins.dtsi b/arch/arm/boot/dts/qcom-apq8064-pins.dtsi index cbe42c4153a0..b4d286a6fab1 100644 --- a/arch/arm/boot/dts/qcom-apq8064-pins.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064-pins.dtsi @@ -76,7 +76,7 @@ pinconf { pins = "gpio20", "gpio21"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; @@ -116,7 +116,7 @@ pinconf { pins = "gpio24", "gpio25"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; @@ -141,7 +141,7 @@ pinconf { pins = "gpio8", "gpio9"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; @@ -166,7 +166,7 @@ pinconf { pins = "gpio12", "gpio13"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; @@ -229,7 +229,7 @@ pinconf { pins = "gpio16", "gpio17"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; @@ -282,7 +282,7 @@ pinconf { pins = "gpio84", "gpio85"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index f0f81c23c16f..b9a48cfd760f 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1249,14 +1249,14 @@ pins = "gpio47", "gpio48"; function = "blsp_i2c3"; drive-strength = <16>; - bias-disable = <0>; + bias-disable; }; blsp1_i2c3_sleep: blsp1-i2c2-sleep { pins = "gpio47", "gpio48"; function = "gpio"; drive-strength = <2>; - bias-disable = <0>; + bias-disable; }; blsp2_uart3_4pins_default: blsp2-uart2-4pins { diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi index e90f99ef5323..e47c74e513af 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi @@ -33,7 +33,7 @@ ap_h1_spi: &spi0 {}; }; &alc5682 { - realtek,dmic-clk-driving-high = "true"; + realtek,dmic-clk-driving-high; }; &cpu6_alert0 { diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi index 1084d5ce9ac7..07b729f9fec5 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi @@ -630,7 +630,7 @@ pins = "gpio6", "gpio25", "gpio26"; function = "gpio"; drive-strength = <8>; - bias-disable = <0>; + bias-disable; }; }; From 59b18a1e65b7a2134814106d0860010e10babe18 Mon Sep 17 00:00:00 2001 From: Reto Buerki Date: Thu, 7 Apr 2022 13:06:47 +0200 Subject: [PATCH 388/708] x86/msi: Fix msi message data shadow struct The x86 MSI message data is 32 bits in total and is either in compatibility or remappable format, see Intel Virtualization Technology for Directed I/O, section 5.1.2. Fixes: 6285aa50736 ("x86/msi: Provide msi message shadow structs") Co-developed-by: Adrian-Ken Rueegsegger Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Reto Buerki Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220407110647.67372-1-reet@codelabs.ch --- arch/x86/include/asm/msi.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index b85147d75626..d71c7e8b738d 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, /* Structs and defines for the X86 specific MSI message format */ typedef struct x86_msi_data { - u32 vector : 8, - delivery_mode : 3, - dest_mode_logical : 1, - reserved : 2, - active_low : 1, - is_level : 1; - - u32 dmar_subhandle; + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; } __attribute__ ((packed)) arch_msi_msg_data_t; #define arch_msi_msg_data x86_msi_data From 83a8441f8d8e2e47e9bf2aead3aca625ab95d5ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Apr 2022 08:41:39 -0400 Subject: [PATCH 389/708] mm/huge_memory: Avoid calling pmd_page() on a non-leaf PMD Calling try_to_unmap() with TTU_SPLIT_HUGE_PMD and a folio that's not mapped by a PMD causes oopses on arm64 because we now call page_folio() on an invalid page. pmd_page() returns a valid page for non-leaf PMDs on some architectures, so this bug escaped testing before now. Fix this bug by delaying the call to pmd_page() until after we know the PMD is a leaf. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215804 Fixes: af28a988b313 ("mm/huge_memory: Convert __split_huge_pmd() to take a folio") Reported-by: Zorro Lang Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Zorro Lang --- mm/huge_memory.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2fe38212e07c..c468fee595ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2145,15 +2145,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * pmd against. Otherwise we can end up replacing wrong folio. */ VM_BUG_ON(freeze && !folio); - if (folio) { - VM_WARN_ON_ONCE(!folio_test_locked(folio)); - if (folio != page_folio(pmd_page(*pmd))) - goto out; - } + VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || - is_pmd_migration_entry(*pmd)) + is_pmd_migration_entry(*pmd)) { + if (folio && folio != page_folio(pmd_page(*pmd))) + goto out; __split_huge_pmd_locked(vma, pmd, range.start, freeze); + } out: spin_unlock(ptl); From ffe06786b54039edcecb51a54061ee8d81036a19 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 14:35:04 -0400 Subject: [PATCH 390/708] mm/migrate: Use a folio in alloc_migration_target() This removes an assumption that a large folio is HPAGE_PMD_ORDER as well as letting us remove the call to prep_transhuge_page() and a few hidden calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index de175e2fdba5..9894e90db006 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1520,10 +1520,11 @@ out: struct page *alloc_migration_target(struct page *page, unsigned long private) { + struct folio *folio = page_folio(page); struct migration_target_control *mtc; gfp_t gfp_mask; unsigned int order = 0; - struct page *new_page = NULL; + struct folio *new_folio = NULL; int nid; int zidx; @@ -1531,34 +1532,31 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) gfp_mask = mtc->gfp_mask; nid = mtc->nid; if (nid == NUMA_NO_NODE) - nid = page_to_nid(page); + nid = folio_nid(folio); - if (PageHuge(page)) { - struct hstate *h = page_hstate(compound_head(page)); + if (folio_test_hugetlb(folio)) { + struct hstate *h = page_hstate(&folio->page); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); } - if (PageTransHuge(page)) { + if (folio_test_large(folio)) { /* * clear __GFP_RECLAIM to make the migration callback * consistent with regular THP allocations. */ gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; + order = folio_order(folio); } - zidx = zone_idx(page_zone(page)); + zidx = zone_idx(folio_zone(folio)); if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask); + new_folio = __folio_alloc(gfp_mask, order, nid, mtc->nmask); - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); - - return new_page; + return &new_folio->page; } #ifdef CONFIG_NUMA From c185e494ae0ceb126d89b8e3413ed0a1132e05d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 6 Jul 2021 10:50:39 -0400 Subject: [PATCH 391/708] mm/migrate: Use a folio in migrate_misplaced_transhuge_page() Unify alloc_misplaced_dst_page() and alloc_misplaced_dst_page_thp(). Removes an assumption that compound pages are HPAGE_PMD_ORDER. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 56 ++++++++++++++-------------------------------------- 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9894e90db006..6c31ee1e1c9b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1997,32 +1997,20 @@ static struct page *alloc_misplaced_dst_page(struct page *page, unsigned long data) { int nid = (int) data; - struct page *newpage; + int order = compound_order(page); + gfp_t gfp = __GFP_THISNODE; + struct folio *new; - newpage = __alloc_pages_node(nid, - (GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE | __GFP_NOMEMALLOC | - __GFP_NORETRY | __GFP_NOWARN) & - ~__GFP_RECLAIM, 0); + if (order > 0) + gfp |= GFP_TRANSHUGE_LIGHT; + else { + gfp |= GFP_HIGHUSER_MOVABLE | __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN; + gfp &= ~__GFP_RECLAIM; + } + new = __folio_alloc_node(gfp, order, nid); - return newpage; -} - -static struct page *alloc_misplaced_dst_page_thp(struct page *page, - unsigned long data) -{ - int nid = (int) data; - struct page *newpage; - - newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), - HPAGE_PMD_ORDER); - if (!newpage) - goto out; - - prep_transhuge_page(newpage); - -out: - return newpage; + return &new->page; } static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) @@ -2080,22 +2068,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - new_page_t *new; - bool compound; int nr_pages = thp_nr_pages(page); - /* - * PTE mapped THP or HugeTLB page can't reach here so the page could - * be either base page or THP. And it must be head page if it is - * THP. - */ - compound = PageTransHuge(page); - - if (compound) - new = alloc_misplaced_dst_page_thp; - else - new = alloc_misplaced_dst_page; - /* * Don't migrate file pages that are mapped in multiple processes * with execute permissions as they are probably shared libraries. @@ -2116,9 +2090,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, goto out; list_add(&page->lru, &migratepages); - nr_remaining = migrate_pages(&migratepages, *new, NULL, node, - MIGRATE_ASYNC, MR_NUMA_MISPLACED, - &nr_succeeded); + nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, + NULL, node, MIGRATE_ASYNC, + MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { list_del(&page->lru); From f584b68005ac782097d63a691740cb0dfed072ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:11:04 -0400 Subject: [PATCH 392/708] mm: Add vma_alloc_folio() This wrapper around alloc_pages_vma() calls prep_transhuge_page(), removing the obligation from the caller. This is in the same spirit as __folio_alloc(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- include/linux/gfp.h | 8 ++++++-- mm/mempolicy.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 761f8f1885c7..3e3d36fc2109 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -613,9 +613,11 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); struct folio *folio_alloc(gfp_t gfp, unsigned order); -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, +struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, true) #else @@ -627,8 +629,10 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ +#define alloc_pages_vma(gfp_mask, order, vma, addr, hugepage) \ alloc_pages(gfp_mask, order) +#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \ + folio_alloc(gfp, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2516d31db6c..ec15f4f4b714 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2227,6 +2227,19 @@ out: } EXPORT_SYMBOL(alloc_pages_vma); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage) +{ + struct folio *folio; + + folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr, + hugepage); + if (folio && order > 1) + prep_transhuge_page(&folio->page); + + return folio; +} + /** * alloc_pages - Allocate pages. * @gfp: GFP flags. From ec4858e07ed62eceb60bac2ded3c0d6e2471c66b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:23:39 -0400 Subject: [PATCH 393/708] mm/mempolicy: Use vma_alloc_folio() in new_page() Simplify new_page() by unifying the THP and base page cases, and handle orders other than 0 and HPAGE_PMD_ORDER correctly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/mempolicy.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec15f4f4b714..649bd3be8682 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1191,8 +1191,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, */ static struct page *new_page(struct page *page, unsigned long start) { + struct folio *dst, *src = page_folio(page); struct vm_area_struct *vma; unsigned long address; + gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL; vma = find_vma(current->mm, start); while (vma) { @@ -1202,24 +1204,19 @@ static struct page *new_page(struct page *page, unsigned long start) vma = vma->vm_next; } - if (PageHuge(page)) { - return alloc_huge_page_vma(page_hstate(compound_head(page)), + if (folio_test_hugetlb(src)) + return alloc_huge_page_vma(page_hstate(&src->page), vma, address); - } else if (PageTransHuge(page)) { - struct page *thp; - thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } + if (folio_test_large(src)) + gfp = GFP_TRANSHUGE; + /* - * if !vma, alloc_page_vma() will use task or system default policy + * if !vma, vma_alloc_folio() will use task or system default policy */ - return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL, - vma, address); + dst = vma_alloc_folio(gfp, folio_order(src), vma, address, + folio_test_large(src)); + return &dst->page; } #else From 98ea02597b9967c0817d29fee2f96d21b9e59ca5 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 7 Apr 2022 14:40:08 +0800 Subject: [PATCH 394/708] mm/rmap: Fix handling of hugetlbfs pages in page_vma_mapped_walk page_mapped_in_vma() sets nr_pages to 1, which is usually correct as we only want to know about the precise page and not about other pages in the folio. However, hugetlbfs does want to know about the entire hpage, and using nr_pages to get the size of the hpage is wrong. We could change page_mapped_in_vma() to special-case hugetlbfs pages, but it's better to ignore nr_pages in page_vma_mapped_walk() and get the size from the VMA instead. Fixes: 2aff7a4755bed ("mm: Convert page_vma_mapped_walk to work on PFNs") Signed-off-by: zhenwei pi Reviewed-by: Muchun Song Signed-off-by: Matthew Wilcox (Oracle) [edit commit message, use hstate directly] --- mm/page_vma_mapped.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 1187f9c1ec5b..14a5cda73dee 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -163,7 +163,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); if (unlikely(is_vm_hugetlb_page(vma))) { - unsigned long size = pvmw->nr_pages * PAGE_SIZE; + struct hstate *hstate = hstate_vma(vma); + unsigned long size = huge_page_size(hstate); /* The only possible mapping was handled on last iteration */ if (pvmw->pte) return not_found(pvmw); @@ -173,8 +174,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!pvmw->pte) return false; - pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm, - pvmw->pte); + pvmw->ptl = huge_pte_lockptr(hstate, mm, pvmw->pte); spin_lock(pvmw->ptl); if (!check_pte(pvmw)) return not_found(pvmw); From 4d5004451ab2218eab94a30e1841462c9316ba19 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Apr 2022 13:51:32 -0400 Subject: [PATCH 395/708] SUNRPC: Fix the svc_deferred_event trace class Fix a NULL deref crash that occurs when an svc_rqst is deferred while the sunrpc tracing subsystem is enabled. svc_revisit() sets dr->xprt to NULL, so it can't be relied upon in the tracepoint to provide the remote's address. Unfortunately we can't revert the "svc_deferred_class" hunk in commit ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") because there is now a specific check of event format specifiers for unsafe dereferences. The warning that check emits is: event svc_defer_recv has unsafe dereference of argument 1 A "%pISpc" format specifier with a "struct sockaddr *" is indeed flagged by this check. Instead, take the brute-force approach used by the svcrdma_qp_error tracepoint. Convert the dr::addr field into a presentation address in the TP_fast_assign() arm of the trace event, and store that as a string. This fix can be backported to -stable kernels. In the meantime, commit c6ced22997ad ("tracing: Update print fmt check to handle new __get_sockaddr() macro") is now in v5.18, so this wonky fix can be replaced with __sockaddr() and friends properly during the v5.19 merge window. Fixes: ece200ddd54b ("sunrpc: Save remote presentation address in svc_xprt for trace events") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ab8ae1f6ba84..4eb706fa5825 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2017,17 +2017,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(const void *, dr) __field(u32, xid) - __string(addr, dr->xprt->xpt_remotebuf) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) ), TP_fast_assign( __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - __assign_str(addr, dr->xprt->xpt_remotebuf); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", (struct sockaddr *)&dr->addr); ), - TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, __entry->xid) ); From 5106dd6e74ab6c94daac1c357094f11e6934b36f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 Apr 2022 17:18:43 -0600 Subject: [PATCH 396/708] io_uring: propagate issue_flags state down to file assignment We'll need this in a future patch, when we could be assigning the file after the prep stage. While at it, get rid of the io_file_get() helper, it just makes the code harder to read. Signed-off-by: Jens Axboe --- fs/io_uring.c | 82 +++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 969f65de9972..398128db9728 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1183,8 +1183,9 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args); static void io_clean_op(struct io_kiocb *req); -static struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed); +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned issue_flags); +static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1314,13 +1315,20 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx) } static inline void io_req_set_rsrc_node(struct io_kiocb *req, - struct io_ring_ctx *ctx) + struct io_ring_ctx *ctx, + unsigned int issue_flags) { if (!req->fixed_rsrc_refs) { req->fixed_rsrc_refs = &ctx->rsrc_node->refs; - ctx->rsrc_cached_refs--; - if (unlikely(ctx->rsrc_cached_refs < 0)) - io_rsrc_refs_refill(ctx); + + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + lockdep_assert_held(&ctx->uring_lock); + ctx->rsrc_cached_refs--; + if (unlikely(ctx->rsrc_cached_refs < 0)) + io_rsrc_refs_refill(ctx); + } else { + percpu_ref_get(req->fixed_rsrc_refs); + } } } @@ -3330,7 +3338,8 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter return 0; } -static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + unsigned int issue_flags) { struct io_mapped_ubuf *imu = req->imu; u16 index, buf_index = req->buf_index; @@ -3340,7 +3349,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) if (unlikely(buf_index >= ctx->nr_user_bufs)) return -EFAULT; - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, issue_flags); index = array_index_nospec(buf_index, ctx->nr_user_bufs); imu = READ_ONCE(ctx->user_bufs[index]); req->imu = imu; @@ -3502,7 +3511,7 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(req, rw, iter); + ret = io_import_fixed(req, rw, iter, issue_flags); if (ret) return ERR_PTR(ret); return NULL; @@ -4394,8 +4403,10 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; - in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + else + in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { ret = -EBADF; goto done; @@ -4434,8 +4445,10 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; - in = io_file_get(req->ctx, req, sp->splice_fd_in, - (sp->flags & SPLICE_F_FD_IN_FIXED)); + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + else + in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { ret = -EBADF; goto done; @@ -5973,7 +5986,7 @@ static void io_poll_remove_entries(struct io_kiocb *req) * either spurious wakeup or multishot CQE is served. 0 when it's done with * the request, then the mask is stored in req->result. */ -static int io_poll_check_events(struct io_kiocb *req) +static int io_poll_check_events(struct io_kiocb *req, bool locked) { struct io_ring_ctx *ctx = req->ctx; struct io_poll_iocb *poll = io_poll_get_single(req); @@ -6030,7 +6043,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -6055,7 +6068,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -7460,30 +7473,36 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file file_slot->file_ptr = file_ptr; } -static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned int issue_flags) { - struct file *file; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = NULL; unsigned long file_ptr; + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_lock(&ctx->uring_lock); + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; + goto out; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; file = (struct file *) (file_ptr & FFS_MASK); file_ptr &= ~FFS_MASK; /* mask in overlapping REQ_F and FFS bits */ req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, 0); +out: + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_unlock(&ctx->uring_lock); return file; } -static struct file *io_file_get_normal(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); - trace_io_uring_file_get(ctx, req, req->user_data, fd); + trace_io_uring_file_get(req->ctx, req, req->user_data, fd); /* we don't allow fixed io_uring files */ if (file && unlikely(file->f_op == &io_uring_fops)) @@ -7491,15 +7510,6 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx, return file; } -static inline struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed) -{ - if (fixed) - return io_file_get_fixed(ctx, req, fd); - else - return io_file_get_normal(ctx, req, fd); -} - static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) { struct io_kiocb *prev = req->timeout.prev; @@ -7749,8 +7759,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, blk_start_plug_nr_ios(&state->plug, state->submit_nr); } - req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE)); + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); + else + req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); if (unlikely(!req->file)) return -EBADF; } From 6bf9c47a398911e0ab920e362115153596c80432 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Mar 2022 10:10:08 -0600 Subject: [PATCH 397/708] io_uring: defer file assignment If an application uses direct open or accept, it knows in advance what direct descriptor value it will get as it picks it itself. This allows combined requests such as: sqe = io_uring_get_sqe(ring); io_uring_prep_openat_direct(sqe, ..., file_slot); sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe,file_slot, buf, buf_size, 0); sqe->flags |= IOSQE_FIXED_FILE; io_uring_submit(ring); where we prepare both a file open and read, and only get a completion event for the read when both have completed successfully. Currently links are fully prepared before the head is issued, but that fails if the dependent link needs a file assigned that isn't valid until the head has completed. Conversely, if the same chain is performed but the fixed file slot is already valid, then we would be unexpectedly returning data from the old file slot rather than the newly opened one. Make sure we're consistent here. Allow deferral of file setup, which makes this documented case work. Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Jens Axboe --- fs/io-wq.h | 1 + fs/io_uring.c | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index dbecd27656c7..04d374e65e54 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) struct io_wq_work { struct io_wq_work_node list; unsigned flags; + int fd; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 398128db9728..bdc090fec29c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7240,6 +7240,23 @@ static void io_clean_op(struct io_kiocb *req) req->flags &= ~IO_REQ_CLEAN_FLAGS; } +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) +{ + if (req->file || !io_op_defs[req->opcode].needs_file) + return true; + + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->work.fd, issue_flags); + else + req->file = io_file_get_normal(req, req->work.fd); + if (req->file) + return true; + + req_set_fail(req); + req->result = -EBADF; + return false; +} + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { const struct cred *creds = NULL; @@ -7250,6 +7267,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (!io_op_defs[req->opcode].audit_skip) audit_uring_entry(req->opcode); + if (unlikely(!io_assign_file(req, issue_flags))) + return -EBADF; switch (req->opcode) { case IORING_OP_NOP: @@ -7394,10 +7413,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + const struct io_op_def *def = &io_op_defs[req->opcode]; unsigned int issue_flags = IO_URING_F_UNLOCKED; bool needs_poll = false; struct io_kiocb *timeout; - int ret = 0; + int ret = 0, err = -ECANCELED; /* one will be dropped by ->io_free_work() after returning to io-wq */ if (!(req->flags & REQ_F_REFCOUNT)) @@ -7409,14 +7429,18 @@ static void io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); + if (!io_assign_file(req, issue_flags)) { + err = -EBADF; + work->flags |= IO_WQ_WORK_CANCEL; + } + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (work->flags & IO_WQ_WORK_CANCEL) { - io_req_task_queue_fail(req, -ECANCELED); + io_req_task_queue_fail(req, err); return; } if (req->flags & REQ_F_FORCE_ASYNC) { - const struct io_op_def *def = &io_op_defs[req->opcode]; bool opcode_poll = def->pollin || def->pollout; if (opcode_poll && file_can_poll(req->file)) { @@ -7749,6 +7773,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[opcode].needs_file) { struct io_submit_state *state = &ctx->submit_state; + req->work.fd = READ_ONCE(sqe->fd); + /* * Plug now if we have more than 2 IO left after this, and the * target is potentially a read/write to block based storage. @@ -7758,13 +7784,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, state->need_plug = false; blk_start_plug_nr_ios(&state->plug, state->submit_nr); } - - if (req->flags & REQ_F_FIXED_FILE) - req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); - else - req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); - if (unlikely(!req->file)) - return -EBADF; } personality = READ_ONCE(sqe->personality); From d5361233e9ab920e135819f73dd8466355f1fddd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 31 Mar 2022 12:38:46 -0600 Subject: [PATCH 398/708] io_uring: drop the old style inflight file tracking io_uring tracks requests that are referencing an io_uring descriptor to be able to cancel without worrying about loops in the references. Since we now assign the file at execution time, the easier approach is to drop a potentially problematic reference before we punt the request. This eliminates the need to special case these types of files beyond just marking them as such, and simplifies cancelation quite a bit. This also fixes a recent issue where an async punted tee operation would with the io_uring descriptor as the output file would crash when attempting to get a reference to the file from the io-wq worker. We could have worked around that, but this is the much cleaner fix. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Reported-by: syzbot+c4b9303500a21750b250@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 85 ++++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 58 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bdc090fec29c..ad0d99ffbf0a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -112,8 +112,7 @@ IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ - REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ - REQ_F_ASYNC_DATA) + REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) #define IO_TCTX_REFS_CACHE_NR (1U << 10) @@ -500,7 +499,6 @@ struct io_uring_task { const struct io_ring_ctx *last; struct io_wq *io_wq; struct percpu_counter inflight; - atomic_t inflight_tracked; atomic_t in_idle; spinlock_t task_lock; @@ -1186,6 +1184,8 @@ static void io_clean_op(struct io_kiocb *req); static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); +static void io_drop_inflight_file(struct io_kiocb *req); +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1433,29 +1433,9 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) __must_hold(&req->ctx->timeout_lock) { - struct io_kiocb *req; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - -static bool io_match_linked(struct io_kiocb *head) -{ - struct io_kiocb *req; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; + return cancel_all; } /* @@ -1465,24 +1445,9 @@ static bool io_match_linked(struct io_kiocb *head) static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all) { - bool matched; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - if (head->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = head->ctx; - - /* protect against races with linked timeouts */ - spin_lock_irq(&ctx->timeout_lock); - matched = io_match_linked(head); - spin_unlock_irq(&ctx->timeout_lock); - } else { - matched = io_match_linked(head); - } - return matched; + return cancel_all; } static inline bool req_has_async_data(struct io_kiocb *req) @@ -1645,14 +1610,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) return req->flags & REQ_F_FIXED_FILE; } -static inline void io_req_track_inflight(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; - atomic_inc(¤t->io_uring->inflight_tracked); - } -} - static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -2563,6 +2520,8 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority) WARN_ON_ONCE(!tctx); + io_drop_inflight_file(req); + spin_lock_irqsave(&tctx->task_lock, flags); if (priority) wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list); @@ -6008,7 +5967,10 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) if (!req->result) { struct poll_table_struct pt = { ._key = req->cflags }; - req->result = vfs_poll(req->file, &pt) & req->cflags; + if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) + req->result = -EBADF; + else + req->result = vfs_poll(req->file, &pt) & req->cflags; } /* multishot, just fill an CQE and proceed */ @@ -7226,11 +7188,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(req->apoll); req->apoll = NULL; } - if (req->flags & REQ_F_INFLIGHT) { - struct io_uring_task *tctx = req->task->io_uring; - - atomic_dec(&tctx->inflight_tracked); - } if (req->flags & REQ_F_CREDS) put_cred(req->creds); if (req->flags & REQ_F_ASYNC_DATA) { @@ -7522,6 +7479,19 @@ out: return file; } +/* + * Drop the file for requeue operations. Only used of req->file is the + * io_uring descriptor itself. + */ +static void io_drop_inflight_file(struct io_kiocb *req) +{ + if (unlikely(req->flags & REQ_F_INFLIGHT)) { + fput(req->file); + req->file = NULL; + req->flags &= ~REQ_F_INFLIGHT; + } +} + static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); @@ -7529,8 +7499,8 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd) trace_io_uring_file_get(req->ctx, req, req->user_data, fd); /* we don't allow fixed io_uring files */ - if (file && unlikely(file->f_op == &io_uring_fops)) - io_req_track_inflight(req); + if (file && file->f_op == &io_uring_fops) + req->flags |= REQ_F_INFLIGHT; return file; } @@ -9437,7 +9407,6 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task, xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); atomic_set(&tctx->in_idle, 0); - atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; spin_lock_init(&tctx->task_lock); INIT_WQ_LIST(&tctx->task_list); @@ -10630,7 +10599,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) { if (tracked) - return atomic_read(&tctx->inflight_tracked); + return 0; return percpu_counter_sum(&tctx->inflight); } From cb318216732579da80202fe3e622a504e55b3a0f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Apr 2022 10:31:43 -0600 Subject: [PATCH 399/708] Revert "io_uring: Add support for napi_busy_poll" This reverts commit adc8682ec69012b68d5ab7123e246d2ad9a6f94b. There's some discussion on the API not being as good as it can be. Rather than ship something and be stuck with it forever, let's revert the NAPI support for now and work on getting something sorted out for the next kernel release instead. Link: https://lore.kernel.org/io-uring/b7bbc124-8502-0ee9-d4c8-7c41b4487264@kernel.dk/ Signed-off-by: Jens Axboe --- fs/io_uring.c | 232 +------------------------------------------------- 1 file changed, 1 insertion(+), 231 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ad0d99ffbf0a..60d6ac21519d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -411,11 +410,6 @@ struct io_ring_ctx { struct list_head sqd_list; unsigned long check_cq_overflow; -#ifdef CONFIG_NET_RX_BUSY_POLL - /* used to track busy poll napi_id */ - struct list_head napi_list; - spinlock_t napi_lock; /* napi_list lock */ -#endif struct { unsigned cached_cq_tail; @@ -1569,10 +1563,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_WQ_LIST(&ctx->locked_free_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); -#ifdef CONFIG_NET_RX_BUSY_POLL - INIT_LIST_HEAD(&ctx->napi_list); - spin_lock_init(&ctx->napi_lock); -#endif return ctx; err: kfree(ctx->dummy_ubuf); @@ -5730,108 +5720,6 @@ IO_NETOP_FN(send); IO_NETOP_FN(recv); #endif /* CONFIG_NET */ -#ifdef CONFIG_NET_RX_BUSY_POLL - -#define NAPI_TIMEOUT (60 * SEC_CONVERSION) - -struct napi_entry { - struct list_head list; - unsigned int napi_id; - unsigned long timeout; -}; - -/* - * Add busy poll NAPI ID from sk. - */ -static void io_add_napi(struct file *file, struct io_ring_ctx *ctx) -{ - unsigned int napi_id; - struct socket *sock; - struct sock *sk; - struct napi_entry *ne; - - if (!net_busy_loop_on()) - return; - - sock = sock_from_file(file); - if (!sock) - return; - - sk = sock->sk; - if (!sk) - return; - - napi_id = READ_ONCE(sk->sk_napi_id); - - /* Non-NAPI IDs can be rejected */ - if (napi_id < MIN_NAPI_ID) - return; - - spin_lock(&ctx->napi_lock); - list_for_each_entry(ne, &ctx->napi_list, list) { - if (ne->napi_id == napi_id) { - ne->timeout = jiffies + NAPI_TIMEOUT; - goto out; - } - } - - ne = kmalloc(sizeof(*ne), GFP_NOWAIT); - if (!ne) - goto out; - - ne->napi_id = napi_id; - ne->timeout = jiffies + NAPI_TIMEOUT; - list_add_tail(&ne->list, &ctx->napi_list); -out: - spin_unlock(&ctx->napi_lock); -} - -static inline void io_check_napi_entry_timeout(struct napi_entry *ne) -{ - if (time_after(jiffies, ne->timeout)) { - list_del(&ne->list); - kfree(ne); - } -} - -/* - * Busy poll if globally on and supporting sockets found - */ -static bool io_napi_busy_loop(struct list_head *napi_list) -{ - struct napi_entry *ne, *n; - - list_for_each_entry_safe(ne, n, napi_list, list) { - napi_busy_loop(ne->napi_id, NULL, NULL, true, - BUSY_POLL_BUDGET); - io_check_napi_entry_timeout(ne); - } - return !list_empty(napi_list); -} - -static void io_free_napi_list(struct io_ring_ctx *ctx) -{ - spin_lock(&ctx->napi_lock); - while (!list_empty(&ctx->napi_list)) { - struct napi_entry *ne = - list_first_entry(&ctx->napi_list, struct napi_entry, - list); - - list_del(&ne->list); - kfree(ne); - } - spin_unlock(&ctx->napi_lock); -} -#else -static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx) -{ -} - -static inline void io_free_napi_list(struct io_ring_ctx *ctx) -{ -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; @@ -5986,7 +5874,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) if (unlikely(!filled)) return -ECANCELED; io_cqring_ev_posted(ctx); - io_add_napi(req->file, ctx); } else if (req->result) { return 0; } @@ -6237,7 +6124,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, __io_poll_execute(req, mask, poll->events); return 0; } - io_add_napi(req->file, req->ctx); /* * Release ownership. If someone tried to queue a tw while it was @@ -8028,13 +7914,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) !(ctx->flags & IORING_SETUP_R_DISABLED)) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); -#ifdef CONFIG_NET_RX_BUSY_POLL - spin_lock(&ctx->napi_lock); - if (!list_empty(&ctx->napi_list) && - io_napi_busy_loop(&ctx->napi_list)) - ++ret; - spin_unlock(&ctx->napi_lock); -#endif + if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); if (creds) @@ -8172,9 +8052,6 @@ struct io_wait_queue { struct io_ring_ctx *ctx; unsigned cq_tail; unsigned nr_timeouts; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned busy_poll_to; -#endif }; static inline bool io_should_wake(struct io_wait_queue *iowq) @@ -8236,87 +8113,6 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, return 1; } -#ifdef CONFIG_NET_RX_BUSY_POLL -static void io_adjust_busy_loop_timeout(struct timespec64 *ts, - struct io_wait_queue *iowq) -{ - unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll); - struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to); - - if (timespec64_compare(ts, &pollto) > 0) { - *ts = timespec64_sub(*ts, pollto); - iowq->busy_poll_to = busy_poll_to; - } else { - u64 to = timespec64_to_ns(ts); - - do_div(to, 1000); - iowq->busy_poll_to = to; - ts->tv_sec = 0; - ts->tv_nsec = 0; - } -} - -static inline bool io_busy_loop_timeout(unsigned long start_time, - unsigned long bp_usec) -{ - if (bp_usec) { - unsigned long end_time = start_time + bp_usec; - unsigned long now = busy_loop_current_time(); - - return time_after(now, end_time); - } - return true; -} - -static bool io_busy_loop_end(void *p, unsigned long start_time) -{ - struct io_wait_queue *iowq = p; - - return signal_pending(current) || - io_should_wake(iowq) || - io_busy_loop_timeout(start_time, iowq->busy_poll_to); -} - -static void io_blocking_napi_busy_loop(struct list_head *napi_list, - struct io_wait_queue *iowq) -{ - unsigned long start_time = - list_is_singular(napi_list) ? 0 : - busy_loop_current_time(); - - do { - if (list_is_singular(napi_list)) { - struct napi_entry *ne = - list_first_entry(napi_list, - struct napi_entry, list); - - napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq, - true, BUSY_POLL_BUDGET); - io_check_napi_entry_timeout(ne); - break; - } - } while (io_napi_busy_loop(napi_list) && - !io_busy_loop_end(iowq, start_time)); -} - -static void io_putback_napi_list(struct io_ring_ctx *ctx, - struct list_head *napi_list) -{ - struct napi_entry *cne, *lne; - - spin_lock(&ctx->napi_lock); - list_for_each_entry(cne, &ctx->napi_list, list) - list_for_each_entry(lne, napi_list, list) - if (cne->napi_id == lne->napi_id) { - list_del(&lne->list); - kfree(lne); - break; - } - list_splice(napi_list, &ctx->napi_list); - spin_unlock(&ctx->napi_lock); -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -8329,9 +8125,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, struct io_rings *rings = ctx->rings; ktime_t timeout = KTIME_MAX; int ret; -#ifdef CONFIG_NET_RX_BUSY_POLL - LIST_HEAD(local_napi_list); -#endif do { io_cqring_overflow_flush(ctx); @@ -8354,29 +8147,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } -#ifdef CONFIG_NET_RX_BUSY_POLL - iowq.busy_poll_to = 0; - if (!(ctx->flags & IORING_SETUP_SQPOLL)) { - spin_lock(&ctx->napi_lock); - list_splice_init(&ctx->napi_list, &local_napi_list); - spin_unlock(&ctx->napi_lock); - } -#endif if (uts) { struct timespec64 ts; if (get_timespec64(&ts, uts)) return -EFAULT; -#ifdef CONFIG_NET_RX_BUSY_POLL - if (!list_empty(&local_napi_list)) - io_adjust_busy_loop_timeout(&ts, &iowq); -#endif timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); } -#ifdef CONFIG_NET_RX_BUSY_POLL - else if (!list_empty(&local_napi_list)) - iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll); -#endif init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; @@ -8386,12 +8163,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; trace_io_uring_cqring_wait(ctx, min_events); -#ifdef CONFIG_NET_RX_BUSY_POLL - if (iowq.busy_poll_to) - io_blocking_napi_busy_loop(&local_napi_list, &iowq); - if (!list_empty(&local_napi_list)) - io_putback_napi_list(ctx, &local_napi_list); -#endif do { /* if we can't even flush overflow, don't wait for more */ if (!io_cqring_overflow_flush(ctx)) { @@ -10176,7 +9947,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_req_caches_free(ctx); if (ctx->hash_map) io_wq_put_hash(ctx->hash_map); - io_free_napi_list(ctx); kfree(ctx->cancel_hash); kfree(ctx->dummy_ubuf); kfree(ctx->io_buffers); From 0f5e4b83b37a96e3643951588ed7176b9b187c0a Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 6 Apr 2022 13:55:33 +0200 Subject: [PATCH 400/708] io_uring: implement compat handling for IORING_REGISTER_IOWQ_AFF Similarly to the way it is done im mbind syscall. Cc: stable@vger.kernel.org # 5.14 Fixes: fe76421d1da1dcdb ("io_uring: allow user configurable IO thread CPU affinity") Signed-off-by: Eugene Syromiatnikov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 60d6ac21519d..a88eca3e0902 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11472,7 +11472,15 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, if (len > cpumask_size()) len = cpumask_size(); - if (copy_from_user(new_mask, arg, len)) { + if (in_compat_syscall()) { + ret = compat_get_bitmap(cpumask_bits(new_mask), + (const compat_ulong_t __user *)arg, + len * 8 /* CHAR_BIT */); + } else { + ret = copy_from_user(new_mask, arg, len); + } + + if (ret) { free_cpumask_var(new_mask); return -EFAULT; } From 34bb77184123ae401100a4d156584f12fa630e5c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Apr 2022 12:43:57 +0100 Subject: [PATCH 401/708] io_uring: nospec index for tags on files update Don't forget to array_index_nospec() for indexes before updating rsrc tags in __io_sqe_files_update(), just use already safe and precalculated index @i. Fixes: c3bdad0271834 ("io_uring: add generic rsrc update with tags") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a88eca3e0902..b517fd9c3f60 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9094,7 +9094,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - *io_get_tag_slot(data, up->offset + done) = tag; + *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); err = io_sqe_file_register(ctx, file, i); if (err) { From a07211e3001435fe8591b992464cd8d5e3c98c5a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Apr 2022 12:43:58 +0100 Subject: [PATCH 402/708] io_uring: don't touch scm_fp_list after queueing skb It's safer to not touch scm_fp_list after we queued an skb to which it was assigned, there might be races lurking if we screw subtle sync guarantees on the io_uring side. Fixes: 6b06314c47e14 ("io_uring: add file set registration") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b517fd9c3f60..7e672464dcb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8631,8 +8631,12 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_queue_head(&sk->sk_receive_queue, skb); - for (i = 0; i < nr_files; i++) - fput(fpl->fp[i]); + for (i = 0; i < nr; i++) { + struct file *file = io_file_from_index(ctx, i + offset); + + if (file) + fput(file); + } } else { kfree_skb(skb); free_uid(fpl->user); From 8f0a24801bb44aa58496945aabb904c729176772 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Apr 2022 14:05:04 +0100 Subject: [PATCH 403/708] io_uring: zero tag on rsrc removal Automatically default rsrc tag in io_queue_rsrc_removal(), it's safer than leaving it there and relying on the rest of the code to behave and not use it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1cf262a50df17478ea25b22494dcc19f3a80301f.1649336342.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7e672464dcb3..d62079e9096c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8927,13 +8927,15 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { + u64 *tag_slot = io_get_tag_slot(data, idx); struct io_rsrc_put *prsrc; prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); if (!prsrc) return -ENOMEM; - prsrc->tag = *io_get_tag_slot(data, idx); + prsrc->tag = *tag_slot; + *tag_slot = 0; prsrc->rsrc = rsrc; list_add(&prsrc->list, &node->rsrc_list); return 0; From 4cdd158be9d09223737df83136a1fb65269d809a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Apr 2022 14:05:05 +0100 Subject: [PATCH 404/708] io_uring: use nospec annotation for more indexes There are still several places that using pre array_index_nospec() indexes, fix them up. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b01ef5ee83f72ed35ad525912370b729f5d145f4.1649336342.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d62079e9096c..fafd1ca4780b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9004,7 +9004,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; struct io_fixed_file *file_slot; struct file *file; - int ret, i; + int ret; io_ring_submit_lock(ctx, needs_lock); ret = -ENXIO; @@ -9017,8 +9017,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) if (ret) goto out; - i = array_index_nospec(offset, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, i); + offset = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, offset); ret = -EBADF; if (!file_slot->file_ptr) goto out; @@ -9074,8 +9074,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (file_slot->file_ptr) { file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, up->offset + done, - ctx->rsrc_node, file); + err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); if (err) break; file_slot->file_ptr = 0; @@ -9758,7 +9757,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, i = array_index_nospec(offset, ctx->nr_user_bufs); if (ctx->user_bufs[i] != ctx->dummy_ubuf) { - err = io_queue_rsrc_removal(ctx->buf_data, offset, + err = io_queue_rsrc_removal(ctx->buf_data, i, ctx->rsrc_node, ctx->user_bufs[i]); if (unlikely(err)) { io_buffer_unmap(ctx, &imu); From 262fc47ac17461c8cdc71c70aff6c3ea45acb0b9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 7 Apr 2022 11:38:57 +0200 Subject: [PATCH 405/708] xen/balloon: don't use PV mode extra memory for zone device allocations When running as a Xen PV guest use the extra memory (memory which isn't allocated for the guest at boot time) only for ballooning purposes and not for zone device allocations. This will remove some code without any lack of functionality. While at it move some code to get rid of another #ifdef. Remove a comment which is stale since some time now. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20220407093857.1485-1-jgross@suse.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/balloon.c | 54 ++++++++++++++------------------- drivers/xen/unpopulated-alloc.c | 33 -------------------- 2 files changed, 23 insertions(+), 64 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index dfe26fa17e95..617a7f4f07a8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -689,29 +689,34 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_ballooned_pages); -#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) -static void __init balloon_add_region(unsigned long start_pfn, - unsigned long pages) +static void __init balloon_add_regions(void) { +#if defined(CONFIG_XEN_PV) + unsigned long start_pfn, pages; unsigned long pfn, extra_pfn_end; + unsigned int i; - /* - * If the amount of usable memory has been limited (e.g., with - * the 'mem' command line parameter), don't add pages beyond - * this limit. - */ - extra_pfn_end = min(max_pfn, start_pfn + pages); + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + pages = xen_extra_mem[i].n_pfns; + if (!pages) + continue; - for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { - /* totalram_pages and totalhigh_pages do not - include the boot-time balloon extension, so - don't subtract from it. */ - balloon_append(pfn_to_page(pfn)); + start_pfn = xen_extra_mem[i].start_pfn; + + /* + * If the amount of usable memory has been limited (e.g., with + * the 'mem' command line parameter), don't add pages beyond + * this limit. + */ + extra_pfn_end = min(max_pfn, start_pfn + pages); + + for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) + balloon_append(pfn_to_page(pfn)); + + balloon_stats.total_pages += extra_pfn_end - start_pfn; } - - balloon_stats.total_pages += extra_pfn_end - start_pfn; -} #endif +} static int __init balloon_init(void) { @@ -745,20 +750,7 @@ static int __init balloon_init(void) register_sysctl_table(xen_root); #endif -#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) - { - int i; - - /* - * Initialize the balloon with pages from the extra memory - * regions (see arch/x86/xen/setup.c). - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) - if (xen_extra_mem[i].n_pfns) - balloon_add_region(xen_extra_mem[i].start_pfn, - xen_extra_mem[i].n_pfns); - } -#endif + balloon_add_regions(); task = kthread_run(balloon_thread, NULL, "xen-balloon"); if (IS_ERR(task)) { diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index a8b41057c382..a39f2d36dd9c 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -230,39 +230,6 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_unpopulated_pages); -#ifdef CONFIG_XEN_PV -static int __init init(void) -{ - unsigned int i; - - if (!xen_domain()) - return -ENODEV; - - if (!xen_pv_domain()) - return 0; - - /* - * Initialize with pages from the extra memory regions (see - * arch/x86/xen/setup.c). - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { - unsigned int j; - - for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { - struct page *pg = - pfn_to_page(xen_extra_mem[i].start_pfn + j); - - pg->zone_device_data = page_list; - page_list = pg; - list_count++; - } - } - - return 0; -} -subsys_initcall(init); -#endif - static int __init unpopulated_init(void) { int ret; From 830f1111d90e8770fcfad8bd5628e8ae6fecec06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Mar 2022 20:00:07 -0400 Subject: [PATCH 406/708] NFS: Replace readdir's use of xxhash() with hash_64() Both xxhash() and hash_64() appear to give similarly low collision rates with a standard linearly increasing readdir offset. They both give similarly higher collision rates when applied to ext4's offsets. So switch to using the standard hash_64(). Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 4 ---- fs/nfs/dir.c | 9 +++------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 47a53b3362b6..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -4,10 +4,6 @@ config NFS_FS depends on INET && FILE_LOCKING && MULTIUSER select LOCKD select SUNRPC - select CRYPTO - select CRYPTO_HASH - select XXHASH - select CRYPTO_XXHASH select NFS_ACL_SUPPORT if NFS_V3_ACL help Choose Y here if you want to access files residing on other diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0365063b85a2..c6b263b5faf1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "delegation.h" #include "iostat.h" @@ -350,10 +350,7 @@ out: * of directory cookies. Content is addressed by the value of the * cookie index of the first readdir entry in a page. * - * The xxhash algorithm is chosen because it is fast, and is supposed - * to result in a decent flat distribution of hashes. - * - * We then select only the first 18 bits to avoid issues with excessive + * We select only the first 18 bits to avoid issues with excessive * memory use for the page cache XArray. 18 bits should allow the caching * of 262144 pages of sequences of readdir entries. Since each page holds * 127 readdir entries for a typical 64-bit system, that works out to a @@ -363,7 +360,7 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) { if (cookie == 0) return 0; - return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK; + return hash_64(cookie, 18); } static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, From f00432063db1a0db484e85193eccc6845435b80e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Apr 2022 15:58:11 -0400 Subject: [PATCH 407/708] SUNRPC: Ensure we flush any closed sockets before xs_xprt_free() We must ensure that all sockets are closed before we call xprt_free() and release the reference to the net namespace. The problem is that calling fput() will defer closing the socket until delayed_fput() gets called. Let's fix the situation by allowing rpciod and the transport teardown code (which runs on the system wq) to call __fput_sync(), and directly close the socket. Reported-by: Felix Fu Acked-by: Al Viro Fixes: a73881c96d73 ("SUNRPC: Fix an Oops in udp_poll()") Cc: stable@vger.kernel.org # 5.1.x: 3be232f11a3c: SUNRPC: Prevent immediate close+reconnect Cc: stable@vger.kernel.org # 5.1.x: 89f42494f92f: SUNRPC: Don't call connect() more than once on a TCP socket Cc: stable@vger.kernel.org # 5.1.x Signed-off-by: Trond Myklebust --- fs/file_table.c | 1 + include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 7 +------ net/sunrpc/xprtsock.c | 16 +++++++++++++--- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 7d2e692b66a9..ada8fe814db9 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -412,6 +412,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ac33892da411..a4848c7bab80 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1004,7 +1004,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); -DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); DECLARE_EVENT_CLASS(rpc_xprt_event, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 73344ffb2692..ad62eba540a4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -930,12 +930,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - trace_xprt_disconnect_cleanup(xprt); - xprt->ops->close(xprt); - } - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b75891b3cc0..c6a13893e308 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -879,7 +879,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -915,7 +915,7 @@ static int xs_local_send_request(struct rpc_rqst *req) -status); fallthrough; case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -1185,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1208,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } From dcc7977c7fdd0b59809cf7420ae1d5f5b5bd16ad Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 1 Apr 2022 10:59:05 +0800 Subject: [PATCH 408/708] NFSv4.2: Fix missing removal of SLAB_ACCOUNT on kmem_cache allocation The commit 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") has stripped GFP_KERNEL_ACCOUNT down to GFP_KERNEL, however, it forgot to remove SLAB_ACCOUNT from kmem_cache allocation. It means that memory is still limited by kmemcg. This patch also fix a NULL pointer reference issue [1] reported by NeilBrown. Link: https://lore.kernel.org/all/164870069595.25542.17292003658915487357@noble.neil.brown.name/ [1] Fixes: 5c60e89e71f8 ("NFSv4.2: Fix up an invalid combination of memory allocation flags") Fixes: 5abc1e37afa0 ("mm: list_lru: allocate list_lru_one only when needed") Reported-by: NeilBrown Signed-off-by: Muchun Song Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index ad3405c64b9e..e7b34f7e0614 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -997,7 +997,7 @@ int __init nfs4_xattr_cache_init(void) nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", sizeof(struct nfs4_xattr_cache), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), nfs4_xattr_cache_init_once); if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; From d3c15033b240767d0287f1c4a529cbbe2d5ded8a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 23:18:57 -0400 Subject: [PATCH 409/708] SUNRPC: Handle ENOMEM in call_transmit_status() Both call_transmit() and call_bc_transmit() can now return ENOMEM, so let's make sure that we handle the errors gracefully. Fixes: 0472e4766049 ("SUNRPC: Convert socket page send code to use iov_iter()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3c7407104d54..07328f1d3885 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2200,6 +2200,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2283,6 +2284,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; From 9d82819d5b065348ce623f196bf601028e22ed00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 09:50:19 -0400 Subject: [PATCH 410/708] SUNRPC: Handle low memory situations in call_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to handle ENFILE, ENOBUFS, and ENOMEM, because xprt_wake_pending_tasks() can be called with any one of these due to socket creation failures. Fixes: b61d59fffd3e ("SUNRPC: xs_tcp_connect_worker{4,6}: merge common code") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 07328f1d3885..6757b0fa5367 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2367,6 +2367,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; From 68b78dcdf93a845d68e34918d17c125924240584 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:33:19 -0400 Subject: [PATCH 411/708] NFSv4/pnfs: Handle RPC allocation errors in nfs4_proc_layoutget If rpc_run_task() fails due to an allocation error, then bail out early. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e3f5b380cefe..16106f805ffa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9615,6 +9615,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return ERR_CAST(task); status = rpc_wait_for_completion_task(task); if (status != 0) From 88dee0cc93adcd83db9d089c1163dc88edafd1c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:34:35 -0400 Subject: [PATCH 412/708] NFS: Ensure rpc_run_task() cannot fail in nfs_async_rename() Ensure the call to rpc_run_task() cannot fail by preallocating the rpc_task. Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 5fa11e1aca4c..6f325e10056c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -347,6 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); + task_setup_data.task = &data->task; task_setup_data.callback_data = data; data->cred = get_current_cred(); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 49ba486aea5f..2863e5a69c6a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1694,6 +1694,7 @@ struct nfs_unlinkdata { struct nfs_renamedata { struct nfs_renameargs args; struct nfs_renameres res; + struct rpc_task task; const struct cred *cred; struct inode *old_dir; struct dentry *old_dentry; From 25cf32ad5dba79385f6e7de9008dcb75556c42d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:36:19 -0400 Subject: [PATCH 413/708] SUNRPC: Handle allocation failure in rpc_new_task() If the call to rpc_alloc_task() fails, then ensure that the calldata is released, and that rpc_run_task() and rpc_run_bc_task() bail out early. Reported-by: NeilBrown Fixes: 910ad38697d9 ("NFS: Fix memory allocation in rpc_alloc_task()") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/sched.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6757b0fa5367..af0174d7ce5a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1127,6 +1127,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) struct rpc_task *task; task = rpc_new_task(task_setup_data); + if (IS_ERR(task)) + return task; if (!RPC_IS_ASYNC(task)) task->tk_flags |= RPC_TASK_CRED_NOREF; @@ -1227,6 +1229,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); + if (IS_ERR(task)) { + xprt_free_bc_request(req); + return task; + } + xprt_init_bc_request(req, task); task->tk_action = call_bc_encode; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b258b87a3ec2..7f70c1e608b7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1128,6 +1128,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) if (task == NULL) { task = rpc_alloc_task(); + if (task == NULL) { + rpc_release_calldata(setup_data->callback_ops, + setup_data->callback_data); + return ERR_PTR(-ENOMEM); + } flags = RPC_TASK_DYNAMIC; } From b056fa070814897be32d83b079dbc311375588e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Apr 2022 14:10:23 -0400 Subject: [PATCH 414/708] SUNRPC: svc_tcp_sendmsg() should handle errors from xdr_alloc_bvec() The allocation is done with GFP_KERNEL, but it could still fail in a low memory situation. Fixes: 4a85a6a3320b ("SUNRPC: Handle TCP socket sends with kernel_sendpage() again") Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 478f857cdaed..6ea3d87e1147 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1096,7 +1096,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, int ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) From ff053dbbaffec45c85e5bfe43306d26694a6433f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2022 22:51:58 -0400 Subject: [PATCH 415/708] SUNRPC: Move the call to xprt_send_pagedata() out of xprt_sock_sendmsg() The client and server have different requirements for their memory allocation, so move the allocation of the send buffer out of the socket send code that is common to both. Reported-by: NeilBrown Fixes: b2648015d452 ("SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent") Signed-off-by: Trond Myklebust --- net/sunrpc/socklib.c | 6 ------ net/sunrpc/svcsock.c | 9 ++++++--- net/sunrpc/xprtsock.c | 15 +++++++++++++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 05b38bf68316..71ba4cf513bc 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -221,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg, static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - int err; - - err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); - if (err < 0) - return err; - iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len + xdr->page_base); return xprt_sendmsg(sock, msg, base + xdr->page_base); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6ea3d87e1147..cc35ec433400 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -579,15 +579,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + goto out_unlock; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); } + xdr_free_bvec(xdr); trace_svcsock_udp_send(xprt, err); - +out_unlock: mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c6a13893e308..8ab64ea46870 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -825,9 +825,14 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) static int xs_stream_prepare_request(struct rpc_rqst *req) { + gfp_t gfp = rpc_task_gfp_mask(); + int ret; + + ret = xdr_alloc_bvec(&req->rq_snd_buf, gfp); + if (ret < 0) + return ret; xdr_free_bvec(&req->rq_rcv_buf); - return xdr_alloc_bvec( - &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + return xdr_alloc_bvec(&req->rq_rcv_buf, gfp); } /* @@ -956,6 +961,9 @@ static int xs_udp_send_request(struct rpc_rqst *req) if (!xprt_request_get_cong(xprt, req)) return -EBADSLT; + status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (status < 0) + return status; req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); @@ -2554,6 +2562,9 @@ static int bc_sendto(struct rpc_rqst *req) int err; req->rq_xtime = ktime_get(); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (err < 0) + return err; err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) From b71597edfaade119157ded98991bac7160be80c2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 8 Apr 2022 10:00:42 +0200 Subject: [PATCH 416/708] mmc: core: improve API to make clear mmc_hw_reset is for cards To make it unambiguous that mmc_hw_reset() is for cards and not for controllers, we make the function argument mmc_card instead of mmc_host. Also, all users are converted. Suggested-by: Ulf Hansson Signed-off-by: Wolfram Sang Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20220408080045.6497-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 2 +- drivers/mmc/core/core.c | 5 +++-- drivers/mmc/core/mmc_test.c | 3 +-- drivers/net/wireless/ath/ath10k/sdio.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- drivers/net/wireless/ti/wlcore/sdio.c | 2 +- include/linux/mmc/core.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index db99882c95d8..506dc900f5c7 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -993,7 +993,7 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host, return -EEXIST; md->reset_done |= type; - err = mmc_hw_reset(host); + err = mmc_hw_reset(host->card); /* Ensure we switch back to the correct partition */ if (err) { struct mmc_blk_data *main_md = diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 368f10405e13..c6ae16d40766 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1995,7 +1995,7 @@ static void mmc_hw_reset_for_init(struct mmc_host *host) /** * mmc_hw_reset - reset the card in hardware - * @host: MMC host to which the card is attached + * @card: card to be reset * * Hard reset the card. This function is only for upper layers, like the * block layer or card drivers. You cannot use it in host drivers (struct @@ -2003,8 +2003,9 @@ static void mmc_hw_reset_for_init(struct mmc_host *host) * * Return: 0 on success, -errno on failure */ -int mmc_hw_reset(struct mmc_host *host) +int mmc_hw_reset(struct mmc_card *card) { + struct mmc_host *host = card->host; int ret; ret = host->bus_ops->hw_reset(host); diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index e6a2fd2c6d5c..8d9bceeff986 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -2325,10 +2325,9 @@ static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test) static int mmc_test_reset(struct mmc_test_card *test) { struct mmc_card *card = test->card; - struct mmc_host *host = card->host; int err; - err = mmc_hw_reset(host); + err = mmc_hw_reset(card); if (!err) { /* * Reset will re-enable the card's command queue, but tests diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 63e1c2d783c5..73693c66cef1 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -1633,7 +1633,7 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar) return; } - ret = mmc_hw_reset(ar_sdio->func->card->host); + ret = mmc_hw_reset(ar_sdio->func->card); if (ret) ath10k_warn(ar, "unable to reset sdio: %d\n", ret); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index ba3c159111d3..55285cad527f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4165,7 +4165,7 @@ static int brcmf_sdio_bus_reset(struct device *dev) /* reset the adapter */ sdio_claim_host(sdiodev->func1); - mmc_hw_reset(sdiodev->func1->card->host); + mmc_hw_reset(sdiodev->func1->card); sdio_release_host(sdiodev->func1); brcmf_bus_change_state(sdiodev->bus_if, BRCMF_BUS_DOWN); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index bde9e4bbfffe..4f3238d2a171 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -2639,7 +2639,7 @@ static void mwifiex_sdio_card_reset_work(struct mwifiex_adapter *adapter) /* Run a HW reset of the SDIO interface. */ sdio_claim_host(func); - ret = mmc_hw_reset(func->card->host); + ret = mmc_hw_reset(func->card); sdio_release_host(func); switch (ret) { diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 72fc41ac83c0..9140b0163474 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -146,7 +146,7 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) * To guarantee that the SDIO card is power cycled, as required to make * the FW programming to succeed, let's do a brute force HW reset. */ - mmc_hw_reset(card->host); + mmc_hw_reset(card); sdio_enable_func(func); sdio_release_host(func); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 71101d1ec825..de5c64bbdb72 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -175,7 +175,7 @@ void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq); int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries); -int mmc_hw_reset(struct mmc_host *host); +int mmc_hw_reset(struct mmc_card *card); int mmc_sw_reset(struct mmc_host *host); void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card); From 71ff461c3f41f6465434b9e980c01782763e7ad8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 31 Mar 2022 09:23:01 +0300 Subject: [PATCH 417/708] iommu/omap: Fix regression in probe for NULL pointer dereference Commit 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") started triggering a NULL pointer dereference for some omap variants: __iommu_probe_device from probe_iommu_group+0x2c/0x38 probe_iommu_group from bus_for_each_dev+0x74/0xbc bus_for_each_dev from bus_iommu_probe+0x34/0x2e8 bus_iommu_probe from bus_set_iommu+0x80/0xc8 bus_set_iommu from omap_iommu_init+0x88/0xcc omap_iommu_init from do_one_initcall+0x44/0x24 This is caused by omap iommu probe returning 0 instead of ERR_PTR(-ENODEV) as noted by Jason Gunthorpe . Looks like the regression already happened with an earlier commit 6785eb9105e3 ("iommu/omap: Convert to probe/release_device() call-backs") that changed the function return type and missed converting one place. Cc: Drew Fustini Cc: Lu Baolu Cc: Suman Anna Suggested-by: Jason Gunthorpe Fixes: 6785eb9105e3 ("iommu/omap: Convert to probe/release_device() call-backs") Fixes: 3f6634d997db ("iommu: Use right way to retrieve iommu_ops") Signed-off-by: Tony Lindgren Tested-by: Drew Fustini Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220331062301.24269-1-tony@atomide.com Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 4aab631ef517..d9cf2820c02e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1661,7 +1661,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev) num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", sizeof(phandle)); if (num_iommus < 0) - return 0; + return ERR_PTR(-ENODEV); arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) From 31a099dbd91e69fcab55eef4be15ed7a8c984918 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 7 Apr 2022 15:33:20 +0800 Subject: [PATCH 418/708] arm64: patch_text: Fixup last cpu should be master These patch_text implementations are using stop_machine_cpuslocked infrastructure with atomic cpu_count. The original idea: When the master CPU patch_text, the others should wait for it. But current implementation is using the first CPU as master, which couldn't guarantee the remaining CPUs are waiting. This patch changes the last CPU as the master to solve the potential risk. Fixes: ae16480785de ("arm64: introduce interfaces to hotpatch kernel and module code") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Reviewed-by: Catalin Marinas Reviewed-by: Masami Hiramatsu Cc: Link: https://lore.kernel.org/r/20220407073323.743224-2-guoren@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/patching.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 771f543464e0..33e0fabc0b79 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); From 83bea32ac7ed37bbda58733de61fc9369513f9f9 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Thu, 7 Apr 2022 18:11:28 +0900 Subject: [PATCH 419/708] arm64: Add part number for Arm Cortex-A78AE Add the MIDR part number info for the Arm Cortex-A78AE[1] and add it to spectre-BHB affected list[2]. [1]: https://developer.arm.com/Processors/Cortex-A78AE [2]: https://developer.arm.com/Arm%20Security%20Center/Spectre-BHB Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Cc: James Morse Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220407091128.8700-1-chanho61.park@samsung.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 232b439cbaf3..ff8f4511df71 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5777929d35bf..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), From 2610bd72efe4b376febd477425769f647152a3a8 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 5 Apr 2022 16:53:00 +0900 Subject: [PATCH 420/708] dt-bindings: net: ave: Clean up clocks, resets, and their names using compatible string Instead of "oneOf:" choices, use "allOf:" and "if:" to define clocks, resets, and their names that can be taken by the compatible string. The order of clock-names and reset-names doesn't change here. Signed-off-by: Kunihiko Hayashi Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../bindings/net/socionext,uniphier-ave4.yaml | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index e602761f7b14..f257520b9a7e 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -13,9 +13,6 @@ description: | This describes the devicetree bindings for AVE ethernet controller implemented on Socionext UniPhier SoCs. -allOf: - - $ref: ethernet-controller.yaml# - properties: compatible: enum: @@ -44,25 +41,13 @@ properties: minItems: 1 maxItems: 4 - clock-names: - oneOf: - - items: # for Pro4 - - const: gio - - const: ether - - const: ether-gb - - const: ether-phy - - const: ether # for others + clock-names: true resets: minItems: 1 maxItems: 2 - reset-names: - oneOf: - - items: # for Pro4 - - const: gio - - const: ether - - const: ether # for others + reset-names: true socionext,syscon-phy-mode: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -78,6 +63,42 @@ properties: $ref: mdio.yaml# unevaluatedProperties: false +allOf: + - $ref: ethernet-controller.yaml# + - if: + properties: + compatible: + contains: + const: socionext,uniphier-pro4-ave4 + then: + properties: + clocks: + minItems: 4 + maxItems: 4 + clock-names: + items: + - const: gio + - const: ether + - const: ether-gb + - const: ether-phy + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: gio + - const: ether + else: + properties: + clocks: + maxItems: 1 + clock-names: + const: ether + resets: + maxItems: 1 + reset-names: + const: ether + required: - compatible - reg From 5a80059d88046b0a87e957565363ba3ee57600bb Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 5 Apr 2022 16:53:01 +0900 Subject: [PATCH 421/708] dt-bindings: net: ave: Use unevaluatedProperties This refers common bindings, so this is preferred for unevaluatedProperties instead of additionalProperties. Signed-off-by: Kunihiko Hayashi Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/socionext,uniphier-ave4.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml index f257520b9a7e..b0ebcef6801c 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml @@ -111,7 +111,7 @@ required: - reset-names - mdio -additionalProperties: false +unevaluatedProperties: false examples: - | From 2105f700b53c24aa48b65c15652acc386044d26a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 6 Apr 2022 14:22:41 +0300 Subject: [PATCH 422/708] net/sched: flower: fix parsing of ethertype following VLAN header A tc flower filter matching TCA_FLOWER_KEY_VLAN_ETH_TYPE is expected to match the L2 ethertype following the first VLAN header, as confirmed by linked discussion with the maintainer. However, such rule also matches packets that have additional second VLAN header, even though filter has both eth_type and vlan_ethtype set to "ipv4". Looking at the code this seems to be mostly an artifact of the way flower uses flow dissector. First, even though looking at the uAPI eth_type and vlan_ethtype appear like a distinct fields, in flower they are all mapped to the same key->basic.n_proto. Second, flow dissector skips following VLAN header as no keys for FLOW_DISSECTOR_KEY_CVLAN are set and eventually assigns the value of n_proto to last parsed header. With these, such filters ignore any headers present between first VLAN header and first "non magic" header (ipv4 in this case) that doesn't result FLOW_DISSECT_RET_PROTO_AGAIN. Fix the issue by extending flow dissector VLAN key structure with new 'vlan_eth_type' field that matches first ethertype following previously parsed VLAN header. Modify flower classifier to set the new flow_dissector_key_vlan->vlan_eth_type with value obtained from TCA_FLOWER_KEY_VLAN_ETH_TYPE/TCA_FLOWER_KEY_CVLAN_ETH_TYPE uAPIs. Link: https://lore.kernel.org/all/Yjhgi48BpTGh6dig@nanopsycho/ Fixes: 9399ae9a6cb2 ("net_sched: flower: Add vlan support") Fixes: d64efd0926ba ("net/sched: flower: Add supprt for matching on QinQ vlan headers") Signed-off-by: Vlad Buslov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 2 ++ net/core/flow_dissector.c | 1 + net/sched/cls_flower.c | 18 +++++++++++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index aa33e1092e2c..9f65f1bfbd24 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { __be16 vlan_tci; }; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_mpls_lse { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 03b6e649c428..9bd887610c18 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1183,6 +1183,7 @@ proto_again: VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; + key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c80fc49c0da1..ed5e6f08e74a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1013,6 +1013,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -1031,6 +1032,11 @@ static void fl_set_key_vlan(struct nlattr **tb, } key_val->vlan_tpid = ethertype; key_mask->vlan_tpid = cpu_to_be16(~0); + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1602,8 +1608,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (eth_type_vlan(ethertype)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); @@ -1611,6 +1618,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_CVLAN_ID, TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, &key->cvlan, &mask->cvlan); fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, @@ -3002,13 +3010,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } From 908b768f9a8ffca2ef69f3145e23a6a259f99ac3 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 8 Apr 2022 11:32:00 +0300 Subject: [PATCH 423/708] MAINTAINERS: Fix reviewer info for a few ROHM ICs The email backend used by ROHM keeps labeling patches as spam. Additionally, there have been reports of some emails been completely dropped. Finally also the email list (or shared inbox) linux-power@fi.rohmeurope.com inadvertly stopped working and has not been reviwed during the past few weeks. Remove no longer working list 'linux-power' list-entry and switch my email to use the personal gmail account instead of the company account. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/Yk/zAHusOdf4+h06@dc73szyh141qn5ck3nwqy-3.rev.dnainternet.fi Signed-off-by: Mark Brown --- MAINTAINERS | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..3af36d852c38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5715,7 +5715,7 @@ W: http://lanana.org/docs/device-list/index.html DEVICE RESOURCE MANAGEMENT HELPERS M: Hans de Goede -R: Matti Vaittinen +R: Matti Vaittinen S: Maintained F: include/linux/devm-helpers.h @@ -11208,7 +11208,7 @@ F: scripts/spdxcheck.py LINEAR RANGES HELPERS M: Mark Brown -R: Matti Vaittinen +R: Matti Vaittinen F: lib/linear_ranges.c F: lib/test_linear_ranges.c F: include/linux/linear_range.h @@ -17011,8 +17011,7 @@ S: Odd Fixes F: drivers/tty/serial/rp2.* ROHM BD99954 CHARGER IC -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/power/supply/bd99954-charger.c F: drivers/power/supply/bd99954-charger.h @@ -17035,8 +17034,7 @@ F: drivers/regulator/bd9571mwv-regulator.c F: include/linux/mfd/bd9571mwv.h ROHM POWER MANAGEMENT IC DEVICE DRIVERS -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/clk/clk-bd718x7.c F: drivers/gpio/gpio-bd71815.c @@ -21118,7 +21116,7 @@ F: include/linux/regulator/ K: regulator_get_optional VOLTAGE AND CURRENT REGULATOR IRQ HELPERS -R: Matti Vaittinen +R: Matti Vaittinen F: drivers/regulator/irq_helpers.c VRF From 2cd1881b9821be68d1eb748c96311258b16af225 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 6 Apr 2022 16:54:20 +0300 Subject: [PATCH 424/708] bonding: Update layer2 and layer2+3 hash formula documentation When using layer2 or layer2+3 hash, only the 5th byte of the MAC addresses is used. Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- Documentation/networking/bonding.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 525e6842dd33..43be3782e5df 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -894,7 +894,7 @@ xmit_hash_policy Uses XOR of hardware MAC addresses and packet type ID field to generate the hash. The formula is - hash = source MAC XOR destination MAC XOR packet type ID + hash = source MAC[5] XOR destination MAC[5] XOR packet type ID slave number = hash modulo slave count This algorithm will place all traffic to a particular @@ -910,7 +910,7 @@ xmit_hash_policy Uses XOR of hardware MAC addresses and IP addresses to generate the hash. The formula is - hash = source MAC XOR destination MAC XOR packet type ID + hash = source MAC[5] XOR destination MAC[5] XOR packet type ID hash = hash XOR source IP XOR destination IP hash = hash XOR (hash RSHIFT 16) hash = hash XOR (hash RSHIFT 8) From 726e2c5929de841fdcef4e2bf995680688ae1b87 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 6 Apr 2022 16:18:54 +0200 Subject: [PATCH 425/708] veth: Ensure eth header is in skb's linear part After feeding a decapsulated packet to a veth device with act_mirred, skb_headlen() may be 0. But veth_xmit() calls __dev_forward_skb(), which expects at least ETH_HLEN byte of linear data (as __dev_forward_skb2() calls eth_type_trans(), which pulls ETH_HLEN bytes unconditionally). Use pskb_may_pull() to ensure veth_xmit() respects this constraint. kernel BUG at include/linux/skbuff.h:2328! RIP: 0010:eth_type_trans+0xcf/0x140 Call Trace: __dev_forward_skb2+0xe3/0x160 veth_xmit+0x6e/0x250 [veth] dev_hard_start_xmit+0xc7/0x200 __dev_queue_xmit+0x47f/0x520 ? skb_ensure_writable+0x85/0xa0 ? skb_mpls_pop+0x98/0x1c0 tcf_mirred_act+0x442/0x47e [act_mirred] tcf_action_exec+0x86/0x140 fl_classify+0x1d8/0x1e0 [cls_flower] ? dma_pte_clear_level+0x129/0x1a0 ? dma_pte_clear_level+0x129/0x1a0 ? prb_fill_curr_block+0x2f/0xc0 ? skb_copy_bits+0x11a/0x220 __tcf_classify+0x58/0x110 tcf_classify_ingress+0x6b/0x140 __netif_receive_skb_core.constprop.0+0x47d/0xfd0 ? __iommu_dma_unmap_swiotlb+0x44/0x90 __netif_receive_skb_one_core+0x3d/0xa0 netif_receive_skb+0x116/0x170 be_process_rx+0x22f/0x330 [be2net] be_poll+0x13c/0x370 [be2net] __napi_poll+0x2a/0x170 net_rx_action+0x22f/0x2f0 __do_softirq+0xca/0x2a8 __irq_exit_rcu+0xc1/0xe0 common_interrupt+0x83/0xa0 Fixes: e314dbdc1c0d ("[NET]: Virtual ethernet device driver.") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1b5714926d81..eb0121a64d6d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -320,7 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } From 1b808993e19447731e823b1313ee4e8da7fd92a0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Apr 2022 14:15:21 -0700 Subject: [PATCH 426/708] flow_dissector: fix false-positive __read_overflow2_field() warning Bounds checking is unhappy that we try to copy both Ethernet addresses but pass pointer to the first one. Luckily destination address is the first field so pass the pointer to the entire header, whatever. Signed-off-by: Jakub Kicinski Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9bd887610c18..6f7ec72016dc 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1032,7 +1032,7 @@ bool __skb_flow_dissect(const struct net *net, key_eth_addrs = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS, target_container); - memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); + memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs)); } proto_again: From 7cea5560bf656b84f9ed01c0cc829d4eecd0640b Mon Sep 17 00:00:00 2001 From: Hongbin Wang Date: Wed, 6 Apr 2022 22:46:22 -0400 Subject: [PATCH 427/708] vxlan: fix error return code in vxlan_fdb_append When kmalloc and dst_cache_init failed, should return ENOMEM rather than ENOBUFS. Signed-off-by: Hongbin Wang Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index de97ff98d36e..8a5e3a6d32d7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -651,11 +651,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) - return -ENOBUFS; + return -ENOMEM; if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { kfree(rd); - return -ENOBUFS; + return -ENOMEM; } rd->remote_ip = *ip; From 213d266ebfb1621aab79cfe63388facc520a1381 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 19 Mar 2022 16:21:09 -0700 Subject: [PATCH 428/708] gpiolib: acpi: use correct format characters When compiling with -Wformat, clang emits the following warning: gpiolib-acpi.c:393:4: warning: format specifies type 'unsigned char' but the argument has type 'int' [-Wformat] pin); ^~~ So warning that '%hhX' is paired with an 'int' is all just completely mindless and wrong. Sadly, I can see a different bogus warning reason why people would want to use '%02hhX'. Again, the *sane* thing from a human perspective is to use '%02X. But if the compiler doesn't do any range analysis at all, it could decide that "Oh, that print format could need up to 8 bytes of space in the result". Using '%02hhX' would cut that down to two. And since we use char ev_name[5]; and currently use "_%c%02hhX" as the format string, even a compiler that doesn't notice that "pin <= 255" test that guards this all will go "OK, that's at most 4 bytes and the final NUL termination, so it's fine". While a compiler - like gcc - that only sees that the original source of the 'pin' value is a 'unsigned short' array, and then doesn't take the "pin <= 255" into account, will warn like this: gpiolib-acpi.c: In function 'acpi_gpiochip_request_interrupt': gpiolib-acpi.c:206:24: warning: '%02X' directive writing between 2 and 4 bytes into a region of size 3 [-Wformat-overflow=] sprintf(ev_name, "_%c%02X", ^~~~ gpiolib-acpi.c:206:20: note: directive argument in the range [0, 65535] because gcc isn't being very good at that argument range analysis either. In other words, the original use of 'hhx' was bogus to begin with, and due to *another* compiler warning being bad, and we had that bad code being written back in 2016 to work around _that_ compiler warning (commit e40a3ae1f794: "gpio: acpi: work around false-positive -Wstring-overflow warning"). Sadly, two different bad compiler warnings together does not make for one good one. It just makes for even more pain. End result: I think the simplest and cleanest option is simply the proposed change which undoes that '%hhX' change for gcc, and replaces it with just using a slightly bigger stack allocation. It's not like a 5-byte allocation is in any way likely to have saved any actual stack, since all the other variables in that function are 'int' or bigger. False-positive compiler warnings really do make people write worse code, and that's a problem. But on a scale of bad code, I feel that extending the buffer trivially is better than adding a pointless cast that literally makes no sense. At least in this case the end result isn't unreadable or buggy. We've had several cases of bad compiler warnings that caused changes that were actually horrendously wrong. Fixes: e40a3ae1f794 ("gpio: acpi: work around false-positive -Wstring-overflow warning") Signed-off-by: Linus Torvalds Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a5495ad31c9c..b7c2f2af1dee 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -387,8 +387,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, pin = agpio->pin_table[0]; if (pin <= 255) { - char ev_name[5]; - sprintf(ev_name, "_%c%02hhX", + char ev_name[8]; + sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) From 0c2cae09a765b1c1d842eb9328982976ec735926 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Mar 2022 11:33:11 +0200 Subject: [PATCH 429/708] gpiolib: acpi: Convert type for pin to be unsigned A pin that comes from ACPI tables is of unsigned type. This also applies to the internal APIs which use unsigned int to store the pin. Convert type for pin to be unsigned in the places where it's not yet true. While at it, add a stub for acpi_get_and_request_gpiod() for the sake of consistency in the APIs. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.c | 18 ++++++++++-------- include/linux/gpio/consumer.h | 8 +++++++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index b7c2f2af1dee..c2523ac26fac 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -108,7 +108,7 @@ static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) * controller does not have GPIO chip registered at the moment. This is to * support probe deferral. */ -static struct gpio_desc *acpi_get_gpiod(char *path, int pin) +static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin) { struct gpio_chip *chip; acpi_handle handle; @@ -136,7 +136,7 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin) * as it is intended for use outside of the GPIO layer (in a similar fashion to * gpiod_get_index() for example) it also holds a reference to the GPIO device. */ -struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label) +struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label) { struct gpio_desc *gpio; int ret; @@ -317,11 +317,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, return desc; } -static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) +static bool acpi_gpio_in_ignore_list(const char *controller_in, unsigned int pin_in) { const char *controller, *pin_str; - int len, pin; + unsigned int pin; char *endp; + int len; controller = ignore_wake; while (controller) { @@ -354,13 +355,13 @@ err: static bool acpi_gpio_irq_is_wake(struct device *parent, struct acpi_resource_gpio *agpio) { - int pin = agpio->pin_table[0]; + unsigned int pin = agpio->pin_table[0]; if (agpio->wake_capable != ACPI_WAKE_CAPABLE) return false; if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) { - dev_info(parent, "Ignoring wakeup on pin %d\n", pin); + dev_info(parent, "Ignoring wakeup on pin %u\n", pin); return false; } @@ -378,7 +379,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, struct acpi_gpio_event *event; irq_handler_t handler = NULL; struct gpio_desc *desc; - int ret, pin, irq; + unsigned int pin; + int ret, irq; if (!acpi_gpio_get_irq_resource(ares, &agpio)) return AE_OK; @@ -1098,7 +1100,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, length = min_t(u16, agpio->pin_table_length, pin_index + bits); for (i = pin_index; i < length; ++i) { - int pin = agpio->pin_table[i]; + unsigned int pin = agpio->pin_table[i]; struct acpi_gpio_connection *conn; struct gpio_desc *desc; bool found; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index c3aa8b330e1c..e71f6e1bfafe 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -688,7 +688,7 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label); +struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ @@ -705,6 +705,12 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } +static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, + char *label) +{ + return ERR_PTR(-ENOSYS); +} + #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ From 98c27add5d96485db731a92dac31567b0486cae8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Apr 2022 23:16:57 +0200 Subject: [PATCH 430/708] ALSA: usb-audio: Cap upper limits of buffer/period bytes for implicit fb In the implicit feedback mode, some parameters are tied between both playback and capture streams. One of the tied parameters is the period size, and this can be a problem if the device has different number of channels to both streams. Assume that an application opens a playback stream that has an implicit feedback from a capture stream, and it allocates up to the max period and buffer size as much as possible. When the capture device supports only more channels than the playback, the minimum period and buffer sizes become larger than the sizes the playback stream took. That is, the minimum size will be over the max size the driver limits, and PCM core sees as if no available configuration is found, returning -EINVAL mercilessly. For avoiding this problem, we have to look through the counter part of audioformat list for each sync ep, and checks the channels. If more channels are found there, we reduce the max period and buffer sizes accordingly. You may wonder that the patch adds only the evaluation of channels between streams, and what about other parameters? Both the format and the rate are tied in the implicit fb mode, hence they are always identical. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215792 Fixes: 5a6c3e11c9c9 ("ALSA: usb-audio: Add hw constraint for implicit fb sync") Cc: Link: https://lore.kernel.org/r/20220407211657.15087-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index cec6e91afea2..6a460225f2e3 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,6 +659,9 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif +#define MAX_BUFFER_BYTES (1024 * 1024) +#define MAX_PERIOD_BYTES (512 * 1024) + static const struct snd_pcm_hardware snd_usb_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -669,9 +672,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, - .buffer_bytes_max = 1024 * 1024, + .buffer_bytes_max = MAX_BUFFER_BYTES, .period_bytes_min = 64, - .period_bytes_max = 512 * 1024, + .period_bytes_max = MAX_PERIOD_BYTES, .periods_min = 2, .periods_max = 1024, }; @@ -971,6 +974,78 @@ static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, ep->cur_buffer_periods); } +/* get the adjusted max buffer (or period) bytes that can fit with the + * paired format for implicit fb + */ +static unsigned int +get_adjusted_max_bytes(struct snd_usb_substream *subs, + struct snd_usb_substream *pair, + struct snd_pcm_hw_params *params, + unsigned int max_bytes, + bool reverse_map) +{ + const struct audioformat *fp, *pp; + unsigned int rmax = 0, r; + + list_for_each_entry(fp, &subs->fmt_list, list) { + if (!fp->implicit_fb) + continue; + if (!reverse_map && + !hw_check_valid_format(subs, params, fp)) + continue; + list_for_each_entry(pp, &pair->fmt_list, list) { + if (pp->iface != fp->sync_iface || + pp->altsetting != fp->sync_altsetting || + pp->ep_idx != fp->sync_ep_idx) + continue; + if (reverse_map && + !hw_check_valid_format(pair, params, pp)) + break; + if (!reverse_map && pp->channels > fp->channels) + r = max_bytes * fp->channels / pp->channels; + else if (reverse_map && pp->channels < fp->channels) + r = max_bytes * pp->channels / fp->channels; + else + r = max_bytes; + rmax = max(rmax, r); + break; + } + } + return rmax; +} + +/* Reduce the period or buffer bytes depending on the paired substream; + * when a paired configuration for implicit fb has a higher number of channels, + * we need to reduce the max size accordingly, otherwise it may become unusable + */ +static int hw_rule_bytes_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + struct snd_usb_substream *pair; + struct snd_interval *it; + unsigned int max_bytes; + unsigned int rmax; + + pair = &subs->stream->substream[!subs->direction]; + if (!pair->ep_num) + return 0; + + if (rule->var == SNDRV_PCM_HW_PARAM_PERIOD_BYTES) + max_bytes = MAX_PERIOD_BYTES; + else + max_bytes = MAX_BUFFER_BYTES; + + rmax = get_adjusted_max_bytes(subs, pair, params, max_bytes, false); + if (!rmax) + rmax = get_adjusted_max_bytes(pair, subs, params, max_bytes, true); + if (!rmax) + return 0; + + it = hw_param_interval(params, rule->var); + return apply_hw_params_minmax(it, 0, rmax); +} + /* * set up the runtime hardware information. */ @@ -1085,6 +1160,16 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, + hw_rule_bytes_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + hw_rule_bytes_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, -1); + if (err < 0) + return err; list_for_each_entry(fp, &subs->fmt_list, list) { if (fp->implicit_fb) { From fee2ec8cceb33b8886bc5894fb07e0b2e34148af Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Apr 2022 23:27:40 +0200 Subject: [PATCH 431/708] ALSA: usb-audio: Increase max buffer size The current limit of max buffer size 1MB seems too small for modern devices with lots of channels and high sample rates. Let's make bigger, 4MB. Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20220407212740.17920-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 6a460225f2e3..37ee6df8b15a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,7 +659,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif -#define MAX_BUFFER_BYTES (1024 * 1024) +#define MAX_BUFFER_BYTES (4 * 1024 * 1024) #define MAX_PERIOD_BYTES (512 * 1024) static const struct snd_pcm_hardware snd_usb_hardware = From d02b4dd84e1a90f7f1444d027c0289bf355b0d5a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:15 +0200 Subject: [PATCH 432/708] perf/imx_ddr: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: In file included from :0:0: In function ‘ddr_perf_counter_enable’, inlined from ‘ddr_perf_irq_handler’ at drivers/perf/fsl_imx8_ddr_perf.c:651:2: ././include/linux/compiler_types.h:352:38: error: call to ‘__compiletime_assert_729’ \ declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ... See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Frank Li Cc: Will Deacon Cc: Mark Rutland Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: linux-arm-kernel@lists.infradead.org Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220405151517.29753-10-bp@alien8.de Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 94ebc1ecace7..b1b2a55de77f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -29,7 +29,7 @@ #define CNTL_OVER_MASK 0xFFFFFFFE #define CNTL_CSV_SHIFT 24 -#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 From 994fd530a512597ffcd713b0f6d5bc916c5698f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Apr 2022 00:03:14 +0100 Subject: [PATCH 433/708] cifs: Check the IOCB_DIRECT flag, not O_DIRECT Use the IOCB_DIRECT indicator flag on the I/O context rather than checking to see if the file was opened O_DIRECT. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a47fa44b6d52..fb60b5410789 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -944,7 +944,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) ssize_t rc; struct inode *inode = file_inode(iocb->ki_filp); - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return cifs_user_readv(iocb, iter); rc = cifs_revalidate_mapping(inode); From d788e51636462e61c6883f7d96b07b06bc291650 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 1 Apr 2022 06:25:17 +0000 Subject: [PATCH 434/708] cifs: release cached dentries only if mount is complete During cifs_kill_sb, we first dput all the dentries that we have cached. However this function can also get called for mount failures. So dput the cached dentries only if the filesystem mount is complete. i.e. cifs_sb->root is populated. Fixes: 5e9c89d43fa6 ("cifs: Grab a reference for the dentry of the cached directory during the lifetime of the cache") Signed-off-by: Shyam Prasad N Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fb60b5410789..aba0783a8f09 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -266,22 +266,24 @@ static void cifs_kill_sb(struct super_block *sb) * before we kill the sb. */ if (cifs_sb->root) { + node = rb_first(root); + while (node != NULL) { + tlink = rb_entry(node, struct tcon_link, tl_rbnode); + tcon = tlink_tcon(tlink); + cfid = &tcon->crfid; + mutex_lock(&cfid->fid_mutex); + if (cfid->dentry) { + dput(cfid->dentry); + cfid->dentry = NULL; + } + mutex_unlock(&cfid->fid_mutex); + node = rb_next(node); + } + + /* finally release root dentry */ dput(cifs_sb->root); cifs_sb->root = NULL; } - node = rb_first(root); - while (node != NULL) { - tlink = rb_entry(node, struct tcon_link, tl_rbnode); - tcon = tlink_tcon(tlink); - cfid = &tcon->crfid; - mutex_lock(&cfid->fid_mutex); - if (cfid->dentry) { - dput(cfid->dentry); - cfid->dentry = NULL; - } - mutex_unlock(&cfid->fid_mutex); - node = rb_next(node); - } kill_anon_super(sb); cifs_umount(cifs_sb); From d7442f512b71fc63a99c8a801422dde4fbbf9f93 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 4 Apr 2022 18:15:09 +0200 Subject: [PATCH 435/708] ice: arfs: fix use-after-free when freeing @rx_cpu_rmap The CI testing bots triggered the following splat: [ 718.203054] BUG: KASAN: use-after-free in free_irq_cpu_rmap+0x53/0x80 [ 718.206349] Read of size 4 at addr ffff8881bd127e00 by task sh/20834 [ 718.212852] CPU: 28 PID: 20834 Comm: sh Kdump: loaded Tainted: G S W IOE 5.17.0-rc8_nextqueue-devqueue-02643-g23f3121aca93 #1 [ 718.219695] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0012.070720200218 07/07/2020 [ 718.223418] Call Trace: [ 718.227139] [ 718.230783] dump_stack_lvl+0x33/0x42 [ 718.234431] print_address_description.constprop.9+0x21/0x170 [ 718.238177] ? free_irq_cpu_rmap+0x53/0x80 [ 718.241885] ? free_irq_cpu_rmap+0x53/0x80 [ 718.245539] kasan_report.cold.18+0x7f/0x11b [ 718.249197] ? free_irq_cpu_rmap+0x53/0x80 [ 718.252852] free_irq_cpu_rmap+0x53/0x80 [ 718.256471] ice_free_cpu_rx_rmap.part.11+0x37/0x50 [ice] [ 718.260174] ice_remove_arfs+0x5f/0x70 [ice] [ 718.263810] ice_rebuild_arfs+0x3b/0x70 [ice] [ 718.267419] ice_rebuild+0x39c/0xb60 [ice] [ 718.270974] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 718.274472] ? ice_init_phy_user_cfg+0x360/0x360 [ice] [ 718.278033] ? delay_tsc+0x4a/0xb0 [ 718.281513] ? preempt_count_sub+0x14/0xc0 [ 718.284984] ? delay_tsc+0x8f/0xb0 [ 718.288463] ice_do_reset+0x92/0xf0 [ice] [ 718.292014] ice_pci_err_resume+0x91/0xf0 [ice] [ 718.295561] pci_reset_function+0x53/0x80 <...> [ 718.393035] Allocated by task 690: [ 718.433497] Freed by task 20834: [ 718.495688] Last potentially related work creation: [ 718.568966] The buggy address belongs to the object at ffff8881bd127e00 which belongs to the cache kmalloc-96 of size 96 [ 718.574085] The buggy address is located 0 bytes inside of 96-byte region [ffff8881bd127e00, ffff8881bd127e60) [ 718.579265] The buggy address belongs to the page: [ 718.598905] Memory state around the buggy address: [ 718.601809] ffff8881bd127d00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 718.604796] ffff8881bd127d80: 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc fc [ 718.607794] >ffff8881bd127e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 718.610811] ^ [ 718.613819] ffff8881bd127e80: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc [ 718.617107] ffff8881bd127f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc This is due to that free_irq_cpu_rmap() is always being called *after* (devm_)free_irq() and thus it tries to work with IRQ descs already freed. For example, on device reset the driver frees the rmap right before allocating a new one (the splat above). Make rmap creation and freeing function symmetrical with {request,free}_irq() calls i.e. do that on ifup/ifdown instead of device probe/remove/resume. These operations can be performed independently from the actual device aRFS configuration. Also, make sure ice_vsi_free_irq() clears IRQ affinity notifiers only when aRFS is disabled -- otherwise, CPU rmap sets and clears its own and they must not be touched manually. Fixes: 28bf26724fdb0 ("ice: Implement aRFS") Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Signed-off-by: Alexander Lobakin Tested-by: Ivan Vecera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 9 ++------- drivers/net/ethernet/intel/ice/ice_lib.c | 5 ++++- drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++++---------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 5daade32ea62..fba178e07600 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -577,7 +577,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; - if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list) + if (!vsi || vsi->type != ICE_VSI_PF) return; netdev = vsi->netdev; @@ -599,7 +599,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) int base_idx, i; if (!vsi || vsi->type != ICE_VSI_PF) - return -EINVAL; + return 0; pf = vsi->back; netdev = vsi->netdev; @@ -636,7 +636,6 @@ void ice_remove_arfs(struct ice_pf *pf) if (!pf_vsi) return; - ice_free_cpu_rx_rmap(pf_vsi); ice_clear_arfs(pf_vsi); } @@ -653,9 +652,5 @@ void ice_rebuild_arfs(struct ice_pf *pf) return; ice_remove_arfs(pf); - if (ice_set_cpu_rx_rmap(pf_vsi)) { - dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n"); - return; - } ice_init_arfs(pf_vsi); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2774cbd5b12a..6d19c58ccacd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2689,6 +2689,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) return; vsi->irqs_ready = false; + ice_free_cpu_rx_rmap(vsi); + ice_for_each_q_vector(vsi, i) { u16 vector = i + base; int irq_num; @@ -2702,7 +2704,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) continue; /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); + if (!IS_ENABLED(CONFIG_RFS_ACCEL)) + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d768925785ca..5b1198859da7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2510,6 +2510,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } + err = ice_set_cpu_rx_rmap(vsi); + if (err) { + netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n", + vsi->vsi_num, ERR_PTR(err)); + goto free_q_irqs; + } + vsi->irqs_ready = true; return 0; @@ -3692,20 +3699,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf) */ ice_napi_add(vsi); - status = ice_set_cpu_rx_rmap(vsi); - if (status) { - dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", - vsi->vsi_num, status); - goto unroll_napi_add; - } status = ice_init_mac_fltr(pf); if (status) - goto free_cpu_rx_map; + goto unroll_napi_add; return 0; -free_cpu_rx_map: - ice_free_cpu_rx_rmap(vsi); unroll_napi_add: ice_tc_indir_block_unregister(vsi); unroll_cfg_netdev: @@ -5167,7 +5166,6 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } - ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); From 7d59706dbef8de83b3662026766507bc494223d7 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 24 Mar 2022 14:19:15 +0100 Subject: [PATCH 436/708] Revert "iavf: Fix deadlock occurrence during resetting VF interface" This change caused a regression with resetting while changing network namespaces. By clearing the IFF_UP flag, the kernel now thinks it has fully closed the device. This reverts commit 0cc318d2e8408bc0ffb4662a0c3e5e57005ac6ff. Fixes: 0cc318d2e840 ("iavf: Fix deadlock occurrence during resetting VF interface") Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 190590d32faf..7dfcf78b57fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2871,7 +2871,6 @@ continue_reset: running = adapter->state == __IAVF_RUNNING; if (running) { - netdev->flags &= ~IFF_UP; netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); adapter->link_up = false; @@ -2988,7 +2987,7 @@ continue_reset: * to __IAVF_RUNNING */ iavf_up_complete(adapter); - netdev->flags |= IFF_UP; + iavf_irq_enable(adapter, true); } else { iavf_change_state(adapter, __IAVF_DOWN); @@ -3004,10 +3003,8 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) { + if (running) iavf_change_state(adapter, __IAVF_RUNNING); - netdev->flags |= IFF_UP; - } dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } From 2bbac98d0930e8161b1957dc0ec99de39ade1b3c Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Fri, 8 Apr 2022 09:35:23 -0400 Subject: [PATCH 437/708] RDMA/hfi1: Fix use-after-free bug for mm struct Under certain conditions, such as MPI_Abort, the hfi1 cleanup code may represent the last reference held on the task mm. hfi1_mmu_rb_unregister() then drops the last reference and the mm is freed before the final use in hfi1_release_user_pages(). A new task may allocate the mm structure while it is still being used, resulting in problems. One manifestation is corruption of the mmap_sem counter leading to a hang in down_write(). Another is corruption of an mm struct that is in use by another task. Fixes: 3d2a9d642512 ("IB/hfi1: Ensure correct mm is used at all times") Link: https://lore.kernel.org/r/20220408133523.122165.72975.stgit@awfm-01.cornelisnetworks.com Cc: Signed-off-by: Douglas Miller Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mmu_rb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 876cc78a22cc..7333646021bb 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) unsigned long flags; struct list_head del_list; + /* Prevent freeing of mm until we are completely finished. */ + mmgrab(handler->mn.mm); + /* Unregister first so we don't get any more notifications. */ mmu_notifier_unregister(&handler->mn, handler->mn.mm); @@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) do_remove(handler, &del_list); + /* Now the mm may be freed. */ + mmdrop(handler->mn.mm); + kfree(handler); } From e8cf229ebe5eb31eecee86268223530a872872c2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 3 Apr 2022 20:19:46 -0700 Subject: [PATCH 438/708] tools/testing/nvdimm: Fix security_init() symbol collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with the new perf-event support in the nvdimm core, the nfit_test mock module stops compiling. Rename its security_init() to nfit_security_init(). tools/testing/nvdimm/test/nfit.c:1845:13: error: conflicting types for ‘security_init’; have ‘void(struct nfit_test *)’ 1845 | static void security_init(struct nfit_test *t) | ^~~~~~~~~~~~~ In file included from ./include/linux/perf_event.h:61, from ./include/linux/nd.h:11, from ./drivers/nvdimm/nd-core.h:11, from tools/testing/nvdimm/test/nfit.c:19: Fixes: 9a61d0838cd0 ("drivers/nvdimm: Add nvdimm pmu structure") Cc: Kajol Jain Reviewed-by: Kajol Jain Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/164904238610.1330275.1889212115373993727.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 65dbdda3a054..1da76ccde448 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t) return 0; } -static void security_init(struct nfit_test *t) +static void nfit_security_init(struct nfit_test *t) { int i; @@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t) if (nfit_test_dimm_init(t)) return -ENOMEM; smart_init(t); - security_init(t); + nfit_security_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } From d28820419ca332f856cdf8bef0cafed79c29ed05 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 8 Apr 2022 12:58:44 -0700 Subject: [PATCH 439/708] cxl/pci: Drop shadowed variable 0day reports that wait_for_media_ready() declares an @rc variable twice. >> drivers/cxl/pci.c:439:7: warning: Local variable 'rc' shadows outer variable [shadowVariable] int rc; ^ drivers/cxl/pci.c:431:6: note: Shadowed declaration int rc, i; ^ drivers/cxl/pci.c:439:7: note: Shadow variable int rc; ^ Cc: Randy Dunlap Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active") Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reported-by: kernel test robot Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/164944636936.455177.14136200464724208233.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8a7267d116b7..3f2182d66829 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -436,7 +436,6 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) for (i = mbox_ready_timeout; i; i--) { u32 temp; - int rc; rc = pci_read_config_dword( pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Apr 2022 11:08:58 -0600 Subject: [PATCH 440/708] io_uring: fix race between timeout flush and removal io_flush_timeouts() assumes the timeout isn't in progress of triggering or being removed/canceled, so it unconditionally removes it from the timeout list and attempts to cancel it. Leave it on the list and let the normal timeout cancelation take care of it. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fafd1ca4780b..659f8ecba5b7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1736,12 +1736,11 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) __must_hold(&ctx->completion_lock) { u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_kiocb *req, *tmp; spin_lock_irq(&ctx->timeout_lock); - while (!list_empty(&ctx->timeout_list)) { + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { u32 events_needed, events_got; - struct io_kiocb *req = list_first_entry(&ctx->timeout_list, - struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; @@ -1758,7 +1757,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) if (events_got < events_needed) break; - list_del_init(&req->timeout.list); io_kill_timeout(req, 0); } ctx->cq_last_tm_flush = seq; @@ -6628,6 +6626,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) return -EINVAL; + INIT_LIST_HEAD(&req->timeout.list); data->mode = io_translate_timeout_mode(flags); hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); From 74befa447e6839cdd90ed541159ec783726946f9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Apr 2022 19:55:38 +0300 Subject: [PATCH 441/708] net: mdio: don't defer probe forever if PHY IRQ provider is missing When a driver for an interrupt controller is missing, of_irq_get() returns -EPROBE_DEFER ad infinitum, causing fwnode_mdiobus_phy_device_register(), and ultimately, the entire of_mdiobus_register() call, to fail. In turn, any phy_connect() call towards a PHY on this MDIO bus will also fail. This is not what is expected to happen, because the PHY library falls back to poll mode when of_irq_get() returns a hard error code, and the MDIO bus, PHY and attached Ethernet controller work fine, albeit suboptimally, when the PHY library polls for link status. However, -EPROBE_DEFER has special handling given the assumption that at some point probe deferral will stop, and the driver for the supplier will kick in and create the IRQ domain. Reasons for which the interrupt controller may be missing: - It is not yet written. This may happen if a more recent DT blob (with an interrupt-parent for the PHY) is used to boot an old kernel where the driver didn't exist, and that kernel worked with the vintage-correct DT blob using poll mode. - It is compiled out. Behavior is the same as above. - It is compiled as a module. The kernel will wait for a number of seconds specified in the "deferred_probe_timeout" boot parameter for user space to load the required module. The current default is 0, which times out at the end of initcalls. It is possible that this might cause regressions unless users adjust this boot parameter. The proposed solution is to use the driver_deferred_probe_check_state() helper function provided by the driver core, which gives up after some -EPROBE_DEFER attempts, taking "deferred_probe_timeout" into consideration. The return code is changed from -EPROBE_DEFER into -ENODEV or -ETIMEDOUT, depending on whether the kernel is compiled with support for modules or not. Fixes: 66bdede495c7 ("of_mdio: Fix broken PHY IRQ in case of probe deferral") Suggested-by: Robin Murphy Signed-off-by: Vladimir Oltean Acked-by: Greg Kroah-Hartman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220407165538.4084809-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/base/dd.c | 1 + drivers/net/mdio/fwnode_mdio.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index af6bea56f4e2..3fc3b5940bb3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -296,6 +296,7 @@ int driver_deferred_probe_check_state(struct device *dev) return -EPROBE_DEFER; } +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); static void deferred_probe_timeout_work_func(struct work_struct *work) { diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1becb1a731f6..1c1584fca632 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, int rc; rc = fwnode_irq_get(child, 0); + /* Don't wait forever if the IRQ provider doesn't become available, + * just fall back to poll mode + */ + if (rc == -EPROBE_DEFER) + rc = driver_deferred_probe_check_state(&phy->mdio.dev); if (rc == -EPROBE_DEFER) return rc; From d452088cdfd5a4ad9d96d847d2273fe958d6339b Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 7 Apr 2022 10:07:03 +0300 Subject: [PATCH 442/708] mlxsw: i2c: Fix initialization error flow Add mutex_destroy() call in driver initialization error flow. Fixes: 6882b0aee180f ("mlxsw: Introduce support for I2C bus") Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20220407070703.2421076-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 939b692ffc33..ce843ea91464 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; From e2d88f9ce678cd33763826ae2f0412f181251314 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 7 Apr 2022 09:24:22 -0400 Subject: [PATCH 443/708] sctp: use the correct skb for security_sctp_assoc_request Yi Chen reported an unexpected sctp connection abort, and it occurred when COOKIE_ECHO is bundled with DATA Fragment by SCTP HW GSO. As the IP header is included in chunk->head_skb instead of chunk->skb, it failed to check IP header version in security_sctp_assoc_request(). According to Ondrej, SELinux only looks at IP header (address and IPsec options) and XFRM state data, and these are all included in head_skb for SCTP HW GSO packets. So fix it by using head_skb when calling security_sctp_assoc_request() in processing COOKIE_ECHO. v1->v2: - As Ondrej noticed, chunk->head_skb should also be used for security_sctp_assoc_established() in sctp_sf_do_5_1E_ca(). Fixes: e215dab1c490 ("security: call security_sctp_assoc_request in sctp_sf_do_5_1D_ce") Reported-by: Yi Chen Signed-off-by: Xin Long Reviewed-by: Ondrej Mosnacek Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/71becb489e51284edf0c11fc15246f4ed4cef5b6.1649337862.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f342bc12735..52edee1322fc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -781,7 +781,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -932,7 +932,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, /* Set peer label for connection. */ if (security_sctp_assoc_established((struct sctp_association *)asoc, - chunk->skb)) + chunk->head_skb ?: chunk->skb)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. @@ -2262,7 +2262,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } From e65812fd22eba32f11abe28cb377cbd64cfb1ba0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 7 Apr 2022 11:29:23 -0300 Subject: [PATCH 444/708] net/sched: fix initialization order when updating chain 0 head Currently, when inserting a new filter that needs to sit at the head of chain 0, it will first update the heads pointer on all devices using the (shared) block, and only then complete the initialization of the new element so that it has a "next" element. This can lead to a situation that the chain 0 head is propagated to another CPU before the "next" initialization is done. When this race condition is triggered, packets being matched on that CPU will simply miss all other filters, and will flow through the stack as if there were no other filters installed. If the system is using OVS + TC, such packets will get handled by vswitchd via upcall, which results in much higher latency and reordering. For other applications it may result in packet drops. This is reproducible with a tc only setup, but it varies from system to system. It could be reproduced with a shared block amongst 10 veth tunnels, and an ingress filter mirroring packets to another veth. That's because using the last added veth tunnel to the shared block to do the actual traffic, it makes the race window bigger and easier to trigger. The fix is rather simple, to just initialize the next pointer of the new filter instance (tp) before propagating the head change. The fixes tag is pointing to the original code though this issue should only be observed when using it unlocked. Fixes: 2190d1d0944f ("net: sched: introduce helpers to work with filter chains") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Vlad Buslov Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/b97d5f4eaffeeb9d058155bcab63347527261abf.1649341369.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2957f8f5cea7..f0699f39afdb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1672,10 +1672,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; From ea5dc046127e857a7873ae55fd57c866e9e86fb2 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 30 Mar 2022 17:47:59 +0800 Subject: [PATCH 445/708] cachefiles: unmark inode in use in error path Unmark inode in use if error encountered. If the in-use flag leakage occurs in cachefiles_open_file(), Cachefiles will complain "Inode already in use" when later another cookie with the same index key is looked up. If the in-use flag leakage occurs in cachefiles_create_tmpfile(), though the "Inode already in use" warning won't be triggered, fix the leakage anyway. Reported-by: Gao Xiang Fixes: 1f08c925e7a3 ("cachefiles: Implement backing file wrangling") Signed-off-by: Jeffle Xu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006615.html # v1 Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006618.html # v2 --- fs/cachefiles/namei.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f256c8aff7bb..ca9f3e4ec4b3 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, trace_cachefiles_mark_inactive(object, inode); } +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct inode *inode = d_backing_inode(dentry); + + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, dentry); + inode_unlock(inode); +} + /* * Unmark a backing inode and tell cachefilesd that there's something that can * be culled. @@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, struct inode *inode = file_inode(file); if (inode) { - inode_lock(inode); - __cachefiles_unmark_inode_in_use(object, file->f_path.dentry); - inode_unlock(inode); + cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry); if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { atomic_long_add(inode->i_blocks, &cache->b_released); @@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) object, d_backing_inode(path.dentry), ret, cachefiles_trace_trunc_error); file = ERR_PTR(ret); - goto out_dput; + goto out_unuse; } } @@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry), PTR_ERR(file), cachefiles_trace_open_error); - goto out_dput; + goto out_unuse; } if (unlikely(!file->f_op->read_iter) || unlikely(!file->f_op->write_iter)) { fput(file); pr_notice("Cache does not support read_iter and write_iter\n"); file = ERR_PTR(-EINVAL); + goto out_unuse; } + goto out_dput; + +out_unuse: + cachefiles_do_unmark_inode_in_use(object, path.dentry); out_dput: dput(path.dentry); out: @@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_object *object, check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); - if (ret == -ESTALE) { - fput(file); - dput(dentry); + fput(file); + dput(dentry); + if (ret == -ESTALE) return cachefiles_create_file(object); - } + return false; + error_fput: fput(file); error: + cachefiles_do_unmark_inode_in_use(object, dentry); dput(dentry); return false; } From 7b2f6c306601240635c72caa61f682e74d4591b2 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 5 Apr 2022 09:46:49 -0400 Subject: [PATCH 446/708] cachefiles: Fix KASAN slab-out-of-bounds in cachefiles_set_volume_xattr Use the actual length of volume coherency data when setting the xattr to avoid the following KASAN report. BUG: KASAN: slab-out-of-bounds in cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] Write of size 4 at addr ffff888101e02af4 by task kworker/6:0/1347 CPU: 6 PID: 1347 Comm: kworker/6:0 Kdump: loaded Not tainted 5.18.0-rc1-nfs-fscache-netfs+ #13 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 Workqueue: events fscache_create_volume_work [fscache] Call Trace: dump_stack_lvl+0x45/0x5a print_report.cold+0x5e/0x5db ? __lock_text_start+0x8/0x8 ? cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] kasan_report+0xab/0x120 ? cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] kasan_check_range+0xf5/0x1d0 memcpy+0x39/0x60 cachefiles_set_volume_xattr+0xa0/0x350 [cachefiles] cachefiles_acquire_volume+0x2be/0x500 [cachefiles] ? __cachefiles_free_volume+0x90/0x90 [cachefiles] fscache_create_volume_work+0x68/0x160 [fscache] process_one_work+0x3b7/0x6a0 worker_thread+0x2c4/0x650 ? process_one_work+0x6a0/0x6a0 kthread+0x16c/0x1a0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Allocated by task 1347: kasan_save_stack+0x1e/0x40 __kasan_kmalloc+0x81/0xa0 cachefiles_set_volume_xattr+0x76/0x350 [cachefiles] cachefiles_acquire_volume+0x2be/0x500 [cachefiles] fscache_create_volume_work+0x68/0x160 [fscache] process_one_work+0x3b7/0x6a0 worker_thread+0x2c4/0x650 kthread+0x16c/0x1a0 ret_from_fork+0x22/0x30 The buggy address belongs to the object at ffff888101e02af0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 4 bytes inside of 8-byte region [ffff888101e02af0, ffff888101e02af8) The buggy address belongs to the physical page: page:00000000a2292d70 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x101e02 flags: 0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0000200 0000000000000000 dead000000000001 ffff888100042280 raw: 0000000000000000 0000000080660066 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888101e02980: fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc ffff888101e02a00: 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 >ffff888101e02a80: fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 04 fc ^ ffff888101e02b00: fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc ffff888101e02b80: fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc ================================================================== Fixes: 413a4a6b0b55 "cachefiles: Fix volume coherency attribute" Signed-off-by: Dave Wysochanski Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/20220405134649.6579-1-dwysocha@redhat.com/ # v1 Link: https://lore.kernel.org/r/20220405142810.8208-1-dwysocha@redhat.com/ # Incorrect v2 --- fs/cachefiles/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 35465109d9c4..00b087c14995 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -203,7 +203,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) if (!buf) return false; buf->reserved = cpu_to_be32(0); - memcpy(buf->data, p, len); + memcpy(buf->data, p, volume->vcookie->coherency_len); ret = cachefiles_inject_write_error(); if (ret == 0) From c54eead2a66914a36b4a9ea5bbeb95a768307cba Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 7 Apr 2022 18:28:32 +0800 Subject: [PATCH 447/708] docs: filesystems: caching/backend-api.rst: correct two relinquish APIs use 1. cache backend is using fscache_relinquish_cache() rather than fscache_relinquish_cookie() to reset the cache cookie. 2. No fscache_cache_relinquish() helper currently, it should be fscache_relinquish_cache(). Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006703.html # v1 Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006704.html # v2 --- Documentation/filesystems/caching/backend-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index be793c49a772..d7b2df5fd607 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -73,7 +73,7 @@ busy. If successful, the cache backend can then start setting up the cache. In the event that the initialisation fails, the cache backend should call:: - void fscache_relinquish_cookie(struct fscache_cache *cache); + void fscache_relinquish_cache(struct fscache_cache *cache); to reset and discard the cookie. @@ -125,7 +125,7 @@ outstanding accesses on the volume to complete before returning. When the the cache is completely withdrawn, fscache should be notified by calling:: - void fscache_cache_relinquish(struct fscache_cache *cache); + void fscache_relinquish_cache(struct fscache_cache *cache); to clear fields in the cookie and discard the caller's ref on it. From 5d3d5b9645b53691b2eee7607cf995bcfae46dd0 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 7 Apr 2022 19:02:39 +0800 Subject: [PATCH 448/708] docs: filesystems: caching/backend-api.rst: fix an object withdrawn API There's no fscache_are_objects_withdrawn() helper at all to test if cookie withdrawal is completed currently. The cache backend is using fscache_wait_for_objects() to wait all objects to be withdrawn. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006705.html # v1 --- Documentation/filesystems/caching/backend-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index d7b2df5fd607..d7507becf674 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -110,9 +110,9 @@ to withdraw them, calling:: on the cookie that each object belongs to. This schedules the specified cookie for withdrawal. This gets offloaded to a workqueue. The cache backend can -test for completion by calling:: +wait for completion by calling:: - bool fscache_are_objects_withdrawn(struct fscache_cookie *cache); + void fscache_wait_for_objects(struct fscache_cache *cache); Once all the cookies are withdrawn, a cache backend can withdraw all the volumes, calling:: From 2c547f299827c12244d613eb2ee3616d88f56088 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Wed, 6 Apr 2022 11:50:17 +0800 Subject: [PATCH 449/708] fscache: Remove the cookie parameter from fscache_clear_page_bits() The cookie is not used at all, remove it and update the usage in io.c and afs/write.c (which is the only user outside of fscache currently) at the same time. [DH: Amended the documentation also] Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006659.html --- .../filesystems/caching/netfs-api.rst | 23 +++++++++---------- fs/afs/write.c | 3 +-- fs/fscache/io.c | 5 ++-- include/linux/fscache.h | 4 +--- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst index 5066113acad5..7308d76a29dc 100644 --- a/Documentation/filesystems/caching/netfs-api.rst +++ b/Documentation/filesystems/caching/netfs-api.rst @@ -404,22 +404,21 @@ schedule a write of that region:: And if an error occurs before that point is reached, the marks can be removed by calling:: - void fscache_clear_page_bits(struct fscache_cookie *cookie, - struct address_space *mapping, + void fscache_clear_page_bits(struct address_space *mapping, loff_t start, size_t len, bool caching) -In both of these functions, the cookie representing the cache object to be -written to and a pointer to the mapping to which the source pages are attached -are passed in; start and len indicate the size of the region that's going to be -written (it doesn't have to align to page boundaries necessarily, but it does -have to align to DIO boundaries on the backing filesystem). The caching -parameter indicates if caching should be skipped, and if false, the functions -do nothing. +In these functions, a pointer to the mapping to which the source pages are +attached is passed in and start and len indicate the size of the region that's +going to be written (it doesn't have to align to page boundaries necessarily, +but it does have to align to DIO boundaries on the backing filesystem). The +caching parameter indicates if caching should be skipped, and if false, the +functions do nothing. -The write function takes some additional parameters: i_size indicates the size -of the netfs file and term_func indicates an optional completion function, to -which term_func_priv will be passed, along with the error or amount written. +The write function takes some additional parameters: the cookie representing +the cache object to be written to, i_size indicates the size of the netfs file +and term_func indicates an optional completion function, to which +term_func_priv will be passed, along with the error or amount written. Note that the write function will always run asynchronously and will unmark all the pages upon completion before calling term_func. diff --git a/fs/afs/write.c b/fs/afs/write.c index 6bcf1475511b..4763132ca57e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -616,8 +616,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, _debug("write discard %x @%llx [%llx]", len, start, i_size); /* The dirty region was entirely beyond the EOF. */ - fscache_clear_page_bits(afs_vnode_cache(vnode), - mapping, start, len, caching); + fscache_clear_page_bits(mapping, start, len, caching); afs_pages_written_back(vnode, start, len); ret = 0; } diff --git a/fs/fscache/io.c b/fs/fscache/io.c index c8c7fe9e9a6e..3af3b08a9bb3 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -235,8 +235,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, { struct fscache_write_request *wreq = priv; - fscache_clear_page_bits(fscache_cres_cookie(&wreq->cache_resources), - wreq->mapping, wreq->start, wreq->len, + fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len, wreq->set_bits); if (wreq->term_func) @@ -296,7 +295,7 @@ abandon_end: abandon_free: kfree(wreq); abandon: - fscache_clear_page_bits(cookie, mapping, start, len, cond); + fscache_clear_page_bits(mapping, start, len, cond); if (term_func) term_func(term_func_priv, ret, false); } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6727fb0db619..e25539072463 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -573,7 +573,6 @@ int fscache_write(struct netfs_cache_resources *cres, /** * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages - * @cookie: The cookie representing the cache object * @mapping: The netfs inode to use as the source * @start: The start position in @mapping * @len: The amount of data to unlock @@ -582,8 +581,7 @@ int fscache_write(struct netfs_cache_resources *cres, * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's * waiting. */ -static inline void fscache_clear_page_bits(struct fscache_cookie *cookie, - struct address_space *mapping, +static inline void fscache_clear_page_bits(struct address_space *mapping, loff_t start, size_t len, bool caching) { From 19517e53740ec671c335f05089abe1f0720103c7 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Sat, 2 Apr 2022 12:47:43 +0800 Subject: [PATCH 450/708] fscache: Move fscache_cookies_seq_ops specific code under CONFIG_PROC_FS fscache_cookies_seq_ops is only used in proc.c that is compiled under enabled CONFIG_PROC_FS, so move related code under this config. The same case exsits in internal.h. Also, make fscache_lru_cookie_timeout static due to no user outside of cookie.c. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006649.html # v1 --- fs/fscache/cookie.c | 4 +++- fs/fscache/internal.h | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 9bb1ab5fe5ed..9d3cf0111709 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -30,7 +30,7 @@ static DEFINE_SPINLOCK(fscache_cookie_lru_lock); DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out); static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker); static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; -unsigned int fscache_lru_cookie_timeout = 10 * HZ; +static unsigned int fscache_lru_cookie_timeout = 10 * HZ; void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) { @@ -1069,6 +1069,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, } EXPORT_SYMBOL(__fscache_invalidate); +#ifdef CONFIG_PROC_FS /* * Generate a list of extant cookies in /proc/fs/fscache/cookies */ @@ -1145,3 +1146,4 @@ const struct seq_operations fscache_cookies_seq_ops = { .stop = fscache_cookies_seq_stop, .show = fscache_cookies_seq_show, }; +#endif diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index ed1c9ed737f2..1336f517e9b1 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -56,7 +56,9 @@ static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache, * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; +#ifdef CONFIG_PROC_FS extern const struct seq_operations fscache_cookies_seq_ops; +#endif extern struct timer_list fscache_cookie_lru_timer; extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix); @@ -137,7 +139,9 @@ int fscache_stats_show(struct seq_file *m, void *v); /* * volume.c */ +#ifdef CONFIG_PROC_FS extern const struct seq_operations fscache_volumes_seq_ops; +#endif struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); From b3c958c20a61fb8514fa16e3edcb421703600ee0 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Fri, 1 Apr 2022 14:37:15 +0800 Subject: [PATCH 451/708] fscache: Use wrapper fscache_set_cache_state() directly when relinquishing We already have the wrapper function to set cache state. Signed-off-by: Yue Hu Signed-off-by: David Howells Reviewed-by: Jeffle Xu cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-April/006648.html # v1 --- fs/fscache/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 2749933852a9..d645f8b302a2 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -214,7 +214,7 @@ void fscache_relinquish_cache(struct fscache_cache *cache) cache->ops = NULL; cache->cache_priv = NULL; - smp_store_release(&cache->state, FSCACHE_CACHE_IS_NOT_PRESENT); + fscache_set_cache_state(cache, FSCACHE_CACHE_IS_NOT_PRESENT); fscache_put_cache(cache, where); } EXPORT_SYMBOL(fscache_relinquish_cache); From 61132ceeda723d2c48cbc2610ca3213a7fcb083b Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 31 Mar 2022 19:57:18 +0800 Subject: [PATCH 452/708] fscache: remove FSCACHE_OLD_API Kconfig option Commit 01491a756578 ("fscache, cachefiles: Disable configuration") added the FSCACHE_OLD_API configuration when rewritten. Now, it's not used any more. Remove it. Signed-off-by: Yue Hu Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://listman.redhat.com/archives/linux-cachefs/2022-March/006647.html # v1 --- fs/fscache/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 76316c4a3fb7..b313a978ae0a 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -38,6 +38,3 @@ config FSCACHE_DEBUG enabled by setting bits in /sys/modules/fscache/parameter/debug. See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_OLD_API - bool From a04cd1600b831a16625b45226b90a292c8f6e8d9 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 8 Apr 2022 13:08:52 -0700 Subject: [PATCH 453/708] mm: migrate: use thp_order instead of HPAGE_PMD_ORDER for new page allocation. Fix a VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages) crash. With folios support, it is possible to have other than HPAGE_PMD_ORDER THPs, in the form of folios, in the system. Use thp_order() to correctly determine the source page order during migration. Link: https://lkml.kernel.org/r/20220404165325.1883267-1-zi.yan@sent.com Link: https://lore.kernel.org/linux-mm/20220404132908.GA785673@u2004/ Fixes: d68eccad3706 ("mm/filemap: Allow large folios to be added to the page cache") Reported-by: Naoya Horiguchi Signed-off-by: Zi Yan Cc: Matthew Wilcox Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- mm/migrate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2516d31db6c..358b7c11426d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1209,7 +1209,7 @@ static struct page *new_page(struct page *page, unsigned long start) struct page *thp; thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); + thp_order(page)); if (!thp) return NULL; prep_transhuge_page(thp); diff --git a/mm/migrate.c b/mm/migrate.c index de175e2fdba5..79e4b36f709a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1547,7 +1547,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) */ gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; + order = thp_order(page); } zidx = zone_idx(page_zone(page)); if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) From 66f133ceab7456c789f70a242991ed1b27ba1c3d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 8 Apr 2022 13:08:55 -0700 Subject: [PATCH 454/708] highmem: fix checks in __kmap_local_sched_{in,out} When CONFIG_DEBUG_KMAP_LOCAL is enabled __kmap_local_sched_{in,out} check that even slots in the tsk->kmap_ctrl.pteval are unmapped. The slots are initialized with 0 value, but the check is done with pte_none. 0 pte however does not necessarily mean that pte_none will return true. e.g. on xtensa it returns false, resulting in the following runtime warnings: WARNING: CPU: 0 PID: 101 at mm/highmem.c:627 __kmap_local_sched_out+0x51/0x108 CPU: 0 PID: 101 Comm: touch Not tainted 5.17.0-rc7-00010-gd3a1cdde80d2-dirty #13 Call Trace: dump_stack+0xc/0x40 __warn+0x8f/0x174 warn_slowpath_fmt+0x48/0xac __kmap_local_sched_out+0x51/0x108 __schedule+0x71a/0x9c4 preempt_schedule_irq+0xa0/0xe0 common_exception_return+0x5c/0x93 do_wp_page+0x30e/0x330 handle_mm_fault+0xa70/0xc3c do_page_fault+0x1d8/0x3c4 common_exception+0x7f/0x7f WARNING: CPU: 0 PID: 101 at mm/highmem.c:664 __kmap_local_sched_in+0x50/0xe0 CPU: 0 PID: 101 Comm: touch Tainted: G W 5.17.0-rc7-00010-gd3a1cdde80d2-dirty #13 Call Trace: dump_stack+0xc/0x40 __warn+0x8f/0x174 warn_slowpath_fmt+0x48/0xac __kmap_local_sched_in+0x50/0xe0 finish_task_switch$isra$0+0x1ce/0x2f8 __schedule+0x86e/0x9c4 preempt_schedule_irq+0xa0/0xe0 common_exception_return+0x5c/0x93 do_wp_page+0x30e/0x330 handle_mm_fault+0xa70/0xc3c do_page_fault+0x1d8/0x3c4 common_exception+0x7f/0x7f Fix it by replacing !pte_none(pteval) with pte_val(pteval) != 0. Link: https://lkml.kernel.org/r/20220403235159.3498065-1-jcmvbkbc@gmail.com Fixes: 5fbda3ecd14a ("sched: highmem: Store local kmaps in task struct") Signed-off-by: Max Filippov Reviewed-by: Thomas Gleixner Cc: "Peter Zijlstra (Intel)" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/highmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/highmem.c b/mm/highmem.c index 0cc0c4da7ed9..1a692997fac4 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -624,7 +624,7 @@ void __kmap_local_sched_out(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) @@ -661,7 +661,7 @@ void __kmap_local_sched_in(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) From eafc0a02391b7b36617b36c97c4b5d6832cf5e24 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Fri, 8 Apr 2022 13:08:58 -0700 Subject: [PATCH 455/708] lz4: fix LZ4_decompress_safe_partial read out of bound When partialDecoding, it is EOF if we've either filled the output buffer or can't proceed with reading an offset for following match. In some extreme corner cases when compressed data is suitably corrupted, UAF will occur. As reported by KASAN [1], LZ4_decompress_safe_partial may lead to read out of bound problem during decoding. lz4 upstream has fixed it [2] and this issue has been disscussed here [3] before. current decompression routine was ported from lz4 v1.8.3, bumping lib/lz4 to v1.9.+ is certainly a huge work to be done later, so, we'd better fix it first. [1] https://lore.kernel.org/all/000000000000830d1205cf7f0477@google.com/ [2] https://github.com/lz4/lz4/commit/c5d6f8a8be3927c0bec91bcc58667a6cfad244ad# [3] https://lore.kernel.org/all/CC666AE8-4CA4-4951-B6FB-A2EFDE3AC03B@fb.com/ Link: https://lkml.kernel.org/r/20211111105048.2006070-1-guoxuenan@huawei.com Reported-by: syzbot+63d688f1d899c588fb71@syzkaller.appspotmail.com Signed-off-by: Guo Xuenan Reviewed-by: Nick Terrell Acked-by: Gao Xiang Cc: Yann Collet Cc: Chengyang Fan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lz4/lz4_decompress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 926f4823d5ea..fd1728d94bab 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ From a431dbbc540532b7465eae4fc8b56a85a9fc7d17 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 8 Apr 2022 13:09:01 -0700 Subject: [PATCH 456/708] mm/sparsemem: fix 'mem_section' will never be NULL gcc 12 warning The gcc 12 compiler reports a "'mem_section' will never be NULL" warning on the following code: static inline struct mem_section *__nr_to_section(unsigned long nr) { #ifdef CONFIG_SPARSEMEM_EXTREME if (!mem_section) return NULL; #endif if (!mem_section[SECTION_NR_TO_ROOT(nr)]) return NULL; : It happens with CONFIG_SPARSEMEM_EXTREME off. The mem_section definition is #ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif In the !CONFIG_SPARSEMEM_EXTREME case, mem_section is a static 2-dimensional array and so the check "!mem_section[SECTION_NR_TO_ROOT(nr)]" doesn't make sense. Fix this warning by moving the "!mem_section[SECTION_NR_TO_ROOT(nr)]" check up inside the CONFIG_SPARSEMEM_EXTREME block and adding an explicit NR_SECTION_ROOTS check to make sure that there is no out-of-bound array access. Link: https://lkml.kernel.org/r/20220331180246.2746210-1-longman@redhat.com Fixes: 3e347261a80b ("sparsemem extreme implementation") Signed-off-by: Waiman Long Reported-by: Justin Forbes Cc: "Kirill A . Shutemov" Cc: Ingo Molnar Cc: Rafael Aquini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 962b14d403e8..46ffab808f03 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1397,13 +1397,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); From 01e67e04c28170c47700c2c226d732bbfedb1ad0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Apr 2022 13:09:04 -0700 Subject: [PATCH 457/708] mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0) If an mremap() syscall with old_size=0 ends up in move_page_tables(), it will call invalidate_range_start()/invalidate_range_end() unnecessarily, i.e. with an empty range. This causes a WARN in KVM's mmu_notifier. In the past, empty ranges have been diagnosed to be off-by-one bugs, hence the WARNing. Given the low (so far) number of unique reports, the benefits of detecting more buggy callers seem to outweigh the cost of having to fix cases such as this one, where userspace is doing something silly. In this particular case, an early return from move_page_tables() is enough to fix the issue. Link: https://lkml.kernel.org/r/20220329173155.172439-1-pbonzini@redhat.com Reported-by: syzbot+6bde52d89cfdf9f61425@syzkaller.appspotmail.com Signed-off-by: Paolo Bonzini Cc: Sean Christopherson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mremap.c b/mm/mremap.c index 9d76da79594d..303d3290b938 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -486,6 +486,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + if (!len) + return 0; + old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); From 4ad099559b00ac01c3726e5c95dc3108ef47d03e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 8 Apr 2022 13:09:07 -0700 Subject: [PATCH 458/708] mm/mempolicy: fix mpol_new leak in shared_policy_replace If mpol_new is allocated but not used in restart loop, mpol_new will be freed via mpol_put before returning to the caller. But refcnt is not initialized yet, so mpol_put could not do the right things and might leak the unused mpol_new. This would happen if mempolicy was updated on the shared shmem file while the sp->lock has been dropped during the memory allocation. This issue could be triggered easily with the below code snippet if there are many processes doing the below work at the same time: shmid = shmget((key_t)5566, 1024 * PAGE_SIZE, 0666|IPC_CREAT); shm = shmat(shmid, 0, 0); loop many times { mbind(shm, 1024 * PAGE_SIZE, MPOL_LOCAL, mask, maxnode, 0); mbind(shm + 128 * PAGE_SIZE, 128 * PAGE_SIZE, MPOL_DEFAULT, mask, maxnode, 0); } Link: https://lkml.kernel.org/r/20220329111416.27954-1-linmiaohe@huawei.com Fixes: 42288fe366c4 ("mm: mempolicy: Convert shared_policy mutex to spinlock") Signed-off-by: Miaohe Lin Acked-by: Michal Hocko Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: [3.8] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 358b7c11426d..88a74bc4cba5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2733,6 +2733,7 @@ alloc_new: mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } From 0347b2b95c3e1474c4b10f53df7ff2e841fda147 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 8 Apr 2022 13:09:10 -0700 Subject: [PATCH 459/708] mailmap: update Vasily Averin's email address I'm moving to a @linux.dev account. Map my old addresses. Link: https://lkml.kernel.org/r/737c7c2b-cdab-63ee-be90-cb33316c9657@linux.dev Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index b9d358217586..93458154ce7d 100644 --- a/.mailmap +++ b/.mailmap @@ -391,6 +391,10 @@ Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks +Vasily Averin +Vasily Averin +Vasily Averin +Vasily Averin Vinod Koul Vinod Koul Vinod Koul From b33e1044475afffaaabe51d35837aa10c09ba9ae Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 8 Apr 2022 13:09:13 -0700 Subject: [PATCH 460/708] mm/list_lru.c: revert "mm/list_lru: optimize memcg_reparent_list_lru_node()" Commit 405cc51fc104 ("mm/list_lru: optimize memcg_reparent_list_lru_node()") has subtle races which are proving ugly to fix. Revert the original optimization. If quantitative testing indicates that we have a significant problem here then other implementations can be looked at. Fixes: 405cc51fc104 ("mm/list_lru: optimize memcg_reparent_list_lru_node()") Acked-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Michal Hocko Cc: Waiman Long Cc: Roman Gushchin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/list_lru.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index c669d87001a6..ba76428ceece 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -394,12 +394,6 @@ static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid, int dst_idx = dst_memcg->kmemcg_id; struct list_lru_one *src, *dst; - /* - * If there is no lru entry in this nlru, we can skip it immediately. - */ - if (!READ_ONCE(nlru->nr_items)) - return; - /* * Since list_lru_{add,del} may be called under an IRQ-safe lock, * we have to use IRQ-safe primitives here to avoid deadlock. From 4071a1b9e24ee394b7492bff7542707ee9ad986d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 8 Apr 2022 13:09:16 -0700 Subject: [PATCH 461/708] MAINTAINERS: add Tom as clang reviewer I have been helping with build breaks and other clang things and would like to help with the reviews. Link: https://lkml.kernel.org/r/20220407175715.3378998-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Nathan Chancellor Acked-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3af36d852c38..17156b0a8925 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4791,6 +4791,7 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor M: Nick Desaulniers +R: Tom Rix L: llvm@lists.linux.dev S: Supported W: https://clangbuiltlinux.github.io/ From 1ddff774164f1bcd0fcf988f7a5bb24270fbdf2c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Mar 2022 14:28:34 +0000 Subject: [PATCH 462/708] cifs: Split the smb3_add_credits tracepoint Split the smb3_add_credits tracepoint to make it more obvious when looking at the logs which line corresponds to what credit change. Also add a tracepoint for credit overflow when it's being added back. Note that it might be better to add another field to the tracepoint for the information rather than splitting it. It would also be useful to store the MID potentially, though that isn't available when the credits are first obtained. Signed-off-by: David Howells cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cifs@vger.kernel.org Acked-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- fs/cifs/smb2ops.c | 9 ++++++--- fs/cifs/trace.h | 7 +++++++ fs/cifs/transport.c | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 54155eb4faac..902e8c6c0f9c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1049,7 +1049,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_hdr_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_server_dbg(FYI, "%s: added %u credits total=%d\n", diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index db23f5b404ba..a67df8eaf702 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -86,6 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server, if (*val > 65000) { *val = 65000; /* Don't get near 64K credits, avoid srv bugs */ pr_warn_once("server overflowed SMB3 credits\n"); + trace_smb3_overflow_credits(server->CurrentMid, + server->conn_id, server->hostname, *val, + add, server->in_flight); } server->in_flight--; if (server->in_flight == 0 && @@ -251,7 +254,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_wait_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, -(credits->value), in_flight); cifs_dbg(FYI, "%s: removed %u credits total=%d\n", __func__, credits->value, scredits); @@ -300,7 +303,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_adj_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n", @@ -2492,7 +2495,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_pend_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n", diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 6cecf302dcfd..bc279616c513 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -1006,6 +1006,13 @@ DEFINE_SMB3_CREDIT_EVENT(credit_timeout); DEFINE_SMB3_CREDIT_EVENT(insufficient_credits); DEFINE_SMB3_CREDIT_EVENT(too_many_credits); DEFINE_SMB3_CREDIT_EVENT(add_credits); +DEFINE_SMB3_CREDIT_EVENT(adj_credits); +DEFINE_SMB3_CREDIT_EVENT(hdr_credits); +DEFINE_SMB3_CREDIT_EVENT(nblk_credits); +DEFINE_SMB3_CREDIT_EVENT(pend_credits); +DEFINE_SMB3_CREDIT_EVENT(wait_credits); +DEFINE_SMB3_CREDIT_EVENT(waitff_credits); +DEFINE_SMB3_CREDIT_EVENT(overflow_credits); DEFINE_SMB3_CREDIT_EVENT(set_credits); #endif /* _CIFS_TRACE_H */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index eeb1a699bd6f..d9d1c353bafc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -542,7 +542,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_nblk_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, -1, in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", __func__, 1, scredits); @@ -648,7 +648,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_waitff_credits(server->CurrentMid, server->conn_id, server->hostname, scredits, -(num_credits), in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", From e6934e4048c91502efcb21da92b7ae37cd8fa741 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 8 Apr 2022 12:15:21 +0200 Subject: [PATCH 463/708] net: dsa: felix: suppress -EPROBE_DEFER errors The DSA master might not have been probed yet in which case the probe of the felix switch fails with -EPROBE_DEFER: [ 4.435305] mscc_felix 0000:00:00.5: Failed to register DSA switch: -517 It is not an error. Use dev_err_probe() to demote this particular error to a debug message. Fixes: 56051948773e ("net: dsa: ocelot: add driver for Felix switch family") Signed-off-by: Michael Walle Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220408101521.281886-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 8d382b27e625..52a8566071ed 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -2316,7 +2316,7 @@ static int felix_pci_probe(struct pci_dev *pdev, err = dsa_register_switch(ds); if (err) { - dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err); + dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n"); goto err_register_ds; } From 8d3a6c37d50d5a0504c126c932cc749e6dd9c78f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 8 Apr 2022 10:22:04 +0800 Subject: [PATCH 464/708] net: atlantic: Avoid out-of-bounds indexing UBSAN warnings are observed on atlantic driver: [ 294.432996] UBSAN: array-index-out-of-bounds in /build/linux-Qow4fL/linux-5.15.0/drivers/net/ethernet/aquantia/atlantic/aq_nic.c:484:48 [ 294.433695] index 8 is out of range for type 'aq_vec_s *[8]' The ring is dereferenced right before breaking out the loop, to prevent that from happening, only use the index in the loop to fix the issue. BugLink: https://bugs.launchpad.net/bugs/1958770 Tested-by: Mario Limonciello Signed-off-by: Kai-Heng Feng Reviewed-by: Igor Russkikh Link: https://lore.kernel.org/r/20220408022204.16815-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/aquantia/atlantic/aq_nic.c | 8 +++---- .../net/ethernet/aquantia/atlantic/aq_vec.c | 24 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 33f1a1377588..24d715c28a35 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -486,8 +486,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_vec_start(aq_vec); if (err < 0) goto err_exit; @@ -517,8 +517,8 @@ int aq_nic_start(struct aq_nic_s *self) mod_timer(&self->polling_timer, jiffies + AQ_CFG_POLLING_TIMER_INTERVAL); } else { - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_pci_func_alloc_irq(self, i, self->ndev->name, aq_vec_isr, aq_vec, aq_vec_get_affinity_mask(aq_vec)); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f4774cf051c9..6ab1f3212d24 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (!self) { err = -EINVAL; } else { - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp); ring[AQ_VEC_RX_ID].stats.rx.polls++; u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp); @@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, self->aq_hw_ops = aq_hw_ops; self->aq_hw = aq_hw; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX); if (err < 0) goto err_exit; @@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self) unsigned int i = 0U; int err = 0; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw, &ring[AQ_VEC_TX_ID]); if (err < 0) @@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self) struct aq_ring_s *ring = NULL; unsigned int i = 0U; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw, &ring[AQ_VEC_TX_ID]); @@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]); } @@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_free(&ring[AQ_VEC_TX_ID]); if (i < self->rx_rings) aq_ring_free(&ring[AQ_VEC_RX_ID]); From 8c3ce496bd612bd21679e445f75fcabb6be997b2 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:33 +0530 Subject: [PATCH 465/708] RISC-V: KVM: Don't clear hgatp CSR in kvm_arch_vcpu_put() We might have RISC-V systems (such as QEMU) where VMID is not part of the TLB entry tag so these systems will have to flush all TLB entries upon any change in hgatp.VMID. Currently, we zero-out hgatp CSR in kvm_arch_vcpu_put() and we re-program hgatp CSR in kvm_arch_vcpu_load(). For above described systems, this will flush all TLB entries whenever VCPU exits to user-space hence reducing performance. This patch fixes above described performance issue by not clearing hgatp CSR in kvm_arch_vcpu_put(). Fixes: 34bde9d8b9e6 ("RISC-V: KVM: Implement VCPU world-switch") Cc: stable@vger.kernel.org Signed-off-by: Anup Patel Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 624166004e36..6785aef4cbd4 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -653,8 +653,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); - csr_write(CSR_HGATP, 0); - csr->vsstatus = csr_read(CSR_VSSTATUS); csr->vsie = csr_read(CSR_VSIE); csr->vstvec = csr_read(CSR_VSTVEC); From fac3725364397f9a40a101f089b86ea655a58d06 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:44 +0530 Subject: [PATCH 466/708] KVM: selftests: riscv: Set PTE A and D bits in VS-stage page table Supporting hardware updates of PTE A and D bits is optional for any RISC-V implementation so current software strategy is to always set these bits in both G-stage (hypervisor) and VS-stage (guest kernel). If PTE A and D bits are not set by software (hypervisor or guest) then RISC-V implementations not supporting hardware updates of these bits will cause traps even for perfectly valid PTEs. Based on above explanation, the VS-stage page table created by various KVM selftest applications is not correct because PTE A and D bits are not set. This patch fixes VS-stage page table programming of PTE A and D bits for KVM selftests. Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V 64-bit") Signed-off-by: Anup Patel Tested-by: Mayuresh Chitale Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/include/riscv/processor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index dc284c6bdbc3..eca5c622efd2 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, #define PGTBL_PTE_WRITE_SHIFT 2 #define PGTBL_PTE_READ_MASK 0x0000000000000002ULL #define PGTBL_PTE_READ_SHIFT 1 -#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ +#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \ + PGTBL_PTE_DIRTY_MASK | \ + PGTBL_PTE_EXECUTE_MASK | \ PGTBL_PTE_WRITE_MASK | \ PGTBL_PTE_READ_MASK) #define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL From ebdef0de2dbc40e697adaa6b3408130f7a7b8351 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sat, 9 Apr 2022 09:15:51 +0530 Subject: [PATCH 467/708] KVM: selftests: riscv: Fix alignment of the guest_hang() function The guest_hang() function is used as the default exception handler for various KVM selftests applications by setting it's address in the vstvec CSR. The vstvec CSR requires exception handler base address to be at least 4-byte aligned so this patch fixes alignment of the guest_hang() function. Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V 64-bit") Signed-off-by: Anup Patel Tested-by: Mayuresh Chitale Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d377f2603d98..3961487a4870 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); } -static void guest_hang(void) +static void __aligned(16) guest_hang(void) { while (1) ; From 4054eee9290248bf66c5eacb58879c9aaad37f71 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 9 Apr 2022 09:16:00 +0530 Subject: [PATCH 468/708] RISC-V: KVM: include missing hwcap.h into vcpu_fp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vcpu_fp uses the riscv_isa_extension mechanism which gets defined in hwcap.h but doesn't include that head file. While it seems to work in most cases, in certain conditions this can lead to build failures like ../arch/riscv/kvm/vcpu_fp.c: In function ‘kvm_riscv_vcpu_fp_reset’: ../arch/riscv/kvm/vcpu_fp.c:22:13: error: implicit declaration of function ‘riscv_isa_extension_available’ [-Werror=implicit-function-declaration] 22 | if (riscv_isa_extension_available(&isa, f) || | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/riscv/kvm/vcpu_fp.c:22:49: error: ‘f’ undeclared (first use in this function) 22 | if (riscv_isa_extension_available(&isa, f) || Fix this by simply including the necessary header. Fixes: 0a86512dc113 ("RISC-V: KVM: Factor-out FP virtualization into separate sources") Signed-off-by: Heiko Stuebner Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_fp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 4449a976e5a6..d4308c512007 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) From 940442deea98b3280061095dd811e6136f1b41f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 09:12:55 -0300 Subject: [PATCH 469/708] tools include UAPI: Sync linux/vhost.h with the kernel sources To get the changes in: b04d910af330b55e ("vdpa: support exposing the count of vqs to userspace") a61280ddddaa45f9 ("vdpa: support exposing the config size to userspace") Silencing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h $ diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h --- tools/include/uapi/linux/vhost.h 2021-07-15 16:17:01.840818309 -0300 +++ include/uapi/linux/vhost.h 2022-04-02 18:55:05.702522387 -0300 @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2022-04-04 14:52:25.036375145 -0300 +++ after 2022-04-04 14:52:31.906549976 -0300 @@ -38,4 +38,6 @@ [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", [0x78] = "VDPA_GET_IOVA_RANGE", + [0x79] = "VDPA_GET_CONFIG_SIZE", + [0x80] = "VDPA_GET_VQS_COUNT", }; $ Cc: Longpeng Cc: Michael S. Tsirkin Link: https://lore.kernel.org/lkml/YksxoFcOARk%2Fldev@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c998860d7bbc..5d99e7c242a2 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif From 541f695cbcb6932c22638b06e0cbe1d56177e2e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Apr 2022 17:28:48 -0300 Subject: [PATCH 470/708] tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts Just like its done for ldopts and for both in tools/perf/Makefile.config. Using `` to initialize PERL_EMBED_CCOPTS somehow precludes using: $(filter-out SOMETHING_TO_FILTER,$(PERL_EMBED_CCOPTS)) And we need to do it to allow for building with versions of clang where some gcc options selected by distros are not available. Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YktYX2OnLtyobRYD@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c792e..90774b60d31b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -217,7 +217,7 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) $(OUTPUT)test-libperl.bin: From 41caff459a5b956b3e23ba9ca759dd0629ad3dda Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Apr 2022 10:33:21 -0300 Subject: [PATCH 471/708] tools build: Filter out options and warnings not supported by clang These make the feature check fail when using clang, so remove them just like is done in tools/perf/Makefile.config to build perf itself. Adding -Wno-compound-token-split-by-macro to tools/perf/Makefile.config when building with clang is also necessary to avoid these warnings turned into errors (-Werror): CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o In file included from util/scripting-engines/trace-event-perl.c:35: In file included from /usr/lib64/perl5/CORE/perl.h:4085: In file included from /usr/lib64/perl5/CORE/hv.h:659: In file included from /usr/lib64/perl5/CORE/hv_func.h:34: In file included from /usr/lib64/perl5/CORE/sbox32_hash.h:4: /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '(' and '{' tokens introducing statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:38: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^~~~~~~~~~ /usr/lib64/perl5/CORE/perl.h:737:29: note: expanded from macro 'STMT_START' # define STMT_START (void)( /* gcc supports "({ STATEMENTS; })" */ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: '{' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:80:49: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' #define ZAPHOD32_SCRAMBLE32(v,prime) STMT_START { \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: error: '}' and ')' tokens terminating statement expression appear in different macro expansion contexts [-Werror,-Wcompound-token-split-by-macro] ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:87:41: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' v ^= (v>>23); \ ^ /usr/lib64/perl5/CORE/zaphod32_hash.h:150:5: note: ')' token is here ZAPHOD32_SCRAMBLE32(state[0],0x9fade23b); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/lib64/perl5/CORE/zaphod32_hash.h:88:3: note: expanded from macro 'ZAPHOD32_SCRAMBLE32' } STMT_END ^~~~~~~~ /usr/lib64/perl5/CORE/perl.h:738:21: note: expanded from macro 'STMT_END' # define STMT_END ) ^ Please refer to the discussion on the Link: tag below, where Nathan clarifies the situation: acme> And then get to the problems at the end of this message, which seem acme> similar to the problem described here: acme> acme> From Nathan Chancellor <> acme> Subject [PATCH] mwifiex: Remove unnecessary braces from HostCmd_SET_SEQ_NO_BSS_INFO acme> acme> https://lkml.org/lkml/2020/9/1/135 acme> acme> So perhaps in this case its better to disable that acme> -Werror,-Wcompound-token-split-by-macro when building with clang? Yes, I think that is probably the best solution. As far as I can tell, at least in this file and context, the warning appears harmless, as the "create a GNU C statement expression from two different macros" is very much intentional, based on the presence of PERL_USE_GCC_BRACE_GROUPS. The warning is fixed in upstream Perl by just avoiding creating GNU C statement expressions using STMT_START and STMT_END: https://github.com/Perl/perl5/issues/18780 https://github.com/Perl/perl5/pull/18984 If I am reading the source code correctly, an alternative to disabling the warning would be specifying -DPERL_GCC_BRACE_GROUPS_FORBIDDEN but it seems like that might end up impacting more than just this site, according to the issue discussion above. Based-on-a-patch-by: Sedat Dilek Tested-by: Sedat Dilek # Debian/Selfmade LLVM-14 (x86-64) Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Link: http://lore.kernel.org/lkml/YkxWcYzph5pC1EK8@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 7 +++++++ tools/perf/Makefile.config | 3 +++ 2 files changed, 10 insertions(+) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 90774b60d31b..de66e1cc0734 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -220,6 +220,13 @@ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6a8..5b5ba475a5c0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -790,6 +790,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif From dd6e1fe91cdd52774ca642d1da75b58a86356b56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2022 10:08:07 -0300 Subject: [PATCH 472/708] perf python: Fix probing for some clang command line options The clang compiler complains about some options even without a source file being available, while others require one, so use the simple tools/build/feature/test-hello.c file. Then check for the "is not supported" string in its output, in addition to the "unknown argument" already being looked for. This was noticed when building with clang-13 where -ffat-lto-objects isn't supported and since we were looking just for "unknown argument" and not providing a source code to clang, was mistakenly assumed as being available and not being filtered to set of command line options provided to clang, leading to a build failure. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 483f05004e68..6156bb87ee3e 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars From 3a8a0475861a443f02e3a9b57d044fe2a0a99291 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Apr 2022 11:04:20 -0300 Subject: [PATCH 473/708] perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-13 Using -ffat-lto-objects in the python feature test when building with clang-13 results in: clang-13: error: optimization flag '-ffat-lto-objects' is not supported [-Werror,-Wignored-optimization-argument] error: command '/usr/sbin/clang' failed with exit code 1 cp: cannot stat '/tmp/build/perf/python_ext_build/lib/perf*.so': No such file or directory make[2]: *** [Makefile.perf:639: /tmp/build/perf/python/perf.so] Error 1 Noticed when building on a docker.io/library/archlinux:base container. Cc: Adrian Hunter Cc: Fangrui Song Cc: Florian Fainelli Cc: Ian Rogers Cc: Jiri Olsa Cc: John Keeping Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Sedat Dilek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +++ tools/perf/util/setup.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5b5ba475a5c0..f3bf9297bcc0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 6156bb87ee3e..c255a2c90cd6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,6 +25,8 @@ if cc_is_clang: vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension From 290fa68bdc4588637849adb8534301a1e62beee2 Mon Sep 17 00:00:00 2001 From: Chengdong Li Date: Fri, 8 Apr 2022 16:47:48 +0800 Subject: [PATCH 474/708] perf test tsc: Fix error message when not supported By default `perf test tsc` does not return the error message when the child process detected kernel does not support it. Instead, the child process prints an error message to stderr, unfortunately stderr is redirected to /dev/null when verbose <= 0. This patch does: - return TEST_SKIP to the parent process instead of TEST_OK when perf_read_tsc_conversion() is not supported. - Add a new subtest of testing if TSC is supported on current architecture by moving exist code to a separate function. It avoids two places in test__perf_time_to_tsc() that return TEST_SKIP by doing this. - Extend the test suite definition to contain above two subtests. Current test_suite and test_case structs do not support printing skip reason when the number of subtest less than 1. To print skip reason, it is necessary to extend current test suite definition. Reviewed-by: Adrian Hunter Signed-off-by: Chengdong Li Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: likexu@tencent.com Link: https://lore.kernel.org/r/20220408084748.43707-1-chengdongli@tencent.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 36 +++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index d12d0ad81801..cc6df49a65a1 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -47,6 +47,17 @@ } \ } +static int test__tsc_is_supported(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + if (!TSC_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture\n"); + return TEST_SKIP; + } + + return TEST_OK; +} + /** * test__perf_time_to_tsc - test converting perf time to TSC. * @@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su struct perf_cpu_map *cpus = NULL; struct evlist *evlist = NULL; struct evsel *evsel = NULL; - int err = -1, ret, i; + int err = TEST_FAIL, ret, i; const char *comm1, *comm2; struct perf_tsc_conversion tc; struct perf_event_mmap_page *pc; @@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su u64 test_time, comm1_time = 0, comm2_time = 0; struct mmap *md; - if (!TSC_IS_SUPPORTED) { - pr_debug("Test not supported on this architecture"); - return TEST_SKIP; - } threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); @@ -124,8 +131,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; + pr_debug("perf_read_tsc_conversion is not supported in current kernel\n"); + err = TEST_SKIP; } goto out_err; } @@ -191,7 +198,7 @@ next_event: test_tsc >= comm2_tsc) goto out_err; - err = 0; + err = TEST_OK; out_err: evlist__delete(evlist); @@ -200,4 +207,15 @@ out_err: return err; } -DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc); +static struct test_case time_to_tsc_tests[] = { + TEST_CASE_REASON("TSC support", tsc_is_supported, + "This architecture does not support"), + TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc, + "perf_read_tsc_conversion is not supported"), + { .name = NULL, } +}; + +struct test_suite suite__perf_time_to_tsc = { + .desc = "Convert perf time to TSC", + .test_cases = time_to_tsc_tests, +}; From 278aaba2c555a54e62aec40e04defaa9fffcc1c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 9 Apr 2022 11:48:15 -0300 Subject: [PATCH 475/708] tools headers arm64: Sync arm64's cputype.h with the kernel sources To get the changes in: 83bea32ac7ed37bb ("arm64: Add part number for Arm Cortex-A78AE") That addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/asm/cputype.h' differs from latest version at 'arch/arm64/include/asm/cputype.h' diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Cc: Ali Saidi Cc: Andrew Kilroy Cc: Chanho Park Cc: German Gomez Cc: James Clark Cc: John Garry Cc: Leo Yan Cc: Will Deacon Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9afcc6467a09..e09d6908a21d 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) From fa7095c5c3240bb2ecbc77f8b69be9b1d9e2cf60 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 6 Apr 2022 15:56:51 +0100 Subject: [PATCH 476/708] perf unwind: Don't show unwind error messages when augmenting frame pointer stack Commit Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") intended to add a 'best effort' DWARF unwind that improved the frame pointer stack in most scenarios. It's expected that the unwind will fail sometimes, but this shouldn't be reported as an error. It only works when the return address can be determined from the contents of the link register alone. Fix the error shown when the unwinder requires extra registers by adding a new flag that suppresses error messages. This flag is not set in the normal --call-graph=dwarf unwind mode so that behavior is not changed. Fixes: b9f6fbb3b2c29736 ("perf arm64: Inject missing frames when using 'perf record --call-graph=fp'") Reported-by: John Garry Signed-off-by: James Clark Tested-by: John Garry Cc: Alexander Shishkin Cc: Alexandre Truong Cc: German Gomez Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220406145651.1392529-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 2 +- .../perf/util/arm64-frame-pointer-unwind-support.c | 2 +- tools/perf/util/machine.c | 2 +- tools/perf/util/unwind-libdw.c | 10 +++++++--- tools/perf/util/unwind-libdw.h | 1 + tools/perf/util/unwind-libunwind-local.c | 10 +++++++--- tools/perf/util/unwind-libunwind.c | 6 ++++-- tools/perf/util/unwind.h | 13 ++++++++++--- 8 files changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d262060..afdca7f2959f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr } err = unwind__get_entries(unwind_entry, &cnt, thread, - &sample, MAX_STACK); + &sample, MAX_STACK, false); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 2242a885fbd7..4940be4a0569 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; } - ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); sample->user_regs = old_regs; if (ret || entries.length != 2) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b80048546451..95391236f5f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index a74b517f7497..94aa40f6e348 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg) bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg) report_module(pc, ui); if (!dwfl_frame_pc(state, &pc, &isactivation)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, - int max_stack) + int max_stack, + bool best_effort) { struct unwind_info *ui, ui_buf = { .sample = data, @@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .cb = cb, .arg = arg, .max_stack = max_stack, + .best_effort = best_effort }; Dwarf_Word ip; int err = -EINVAL, i; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 0cbd2650e280..8c88bc4f2304 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -20,6 +20,7 @@ struct unwind_info { void *arg; int max_stack; int idx; + bool best_effort; struct unwind_entry entries[]; }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 71a353349181..41e29fc7648a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -96,6 +96,7 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; + bool best_effort; }; #define dw_read(ptr, type, end) ({ \ @@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as, ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); + if (!ui->best_effort) + pr_err("unwind: can't read reg %d\n", regnum); return ret; } @@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, return -1; ret = unw_init_remote(&c, addr_space, ui); - if (ret) + if (ret && !ui->best_effort) display_error(ret); while (!ret && (unw_step(&c) > 0) && i < max_stack) { @@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->maps->machine, + .best_effort = best_effort }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e89a5479b361..509c287ee762 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { if (thread->maps->unwind_libunwind_ops) - return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, + max_stack, best_effort); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ab8ad469c8de..b2a03fa5289b 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -23,13 +23,19 @@ struct unwind_libunwind_ops { void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, bool best_effort); }; #ifdef HAVE_DWARF_UNWIND_SUPPORT +/* + * When best_effort is set, don't report errors and fail silently. This could + * be expanded in the future to be more permissive about things other than + * error messages. + */ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, + bool best_effort); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT #ifndef LIBUNWIND__ARCH_REG_ID @@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, - int max_stack __maybe_unused) + int max_stack __maybe_unused, + bool best_effort __maybe_unused) { return 0; } From ffab487052054162b3b6c9c6005777ec6cfcea05 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 8 Apr 2022 15:40:56 +0100 Subject: [PATCH 477/708] perf: arm-spe: Fix perf report --mem-mode Since commit bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") "perf mem report" and "perf report --mem-mode" don't allow opening the file unless one of the events has PERF_SAMPLE_DATA_SRC set. SPE doesn't have this set even though synthetic memory data is generated after it is decoded. Fix this issue by setting DATA_SRC on SPE events. This has no effect on the data collected because the SPE driver doesn't do anything with that flag and doesn't generate samples. Fixes: bb30acae4c4dacfa ("perf report: Bail out --mem-mode if mem info is not available") Signed-off-by: James Clark Tested-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: German Gomez Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Ravi Bangoria Cc: Will Deacon Link: https://lore.kernel.org/r/20220408144056.1955535-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/arm-spe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 86e2e926aa0e..af4d63af8072 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) From aeee9dc53ce405d2161f9915f553114e94e5b677 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Apr 2022 16:26:25 +0300 Subject: [PATCH 478/708] perf tools: Fix perf's libperf_print callback eprintf() does not expect va_list as the type of the 4th parameter. Use veprintf() because it does. Signed-off-by: Adrian Hunter Fixes: 428dab813a56ce94 ("libperf: Merge libperf_set_print() into libperf_init()") Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220408132625.2451452-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b42..6aae7b6c376b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) From c9c2a427dd9fe0e73eceacafb42d54f3c4535693 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:10 +0530 Subject: [PATCH 479/708] perf bench: Fix futex bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench futex' testcase fails on systems with more than 1K CPUs. Testcase: perf bench futex all Failure snippet: <<>>Running futex/hash benchmark... perf: pthread_create: No such file or directory <<>> All the futex benchmarks (ie hash, lock-api, requeue, wake, wake-parallel), pthread_create is invoked in respective bench_futex_* function. Though the logs shows direct failure from pthread_create, strace logs showed that actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the futex benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Reviewed-by: Srikar Dronamraju Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 26 +++++++++++++++++++------- tools/perf/bench/futex-lock-pi.c | 21 ++++++++++++++++----- tools/perf/bench/futex-requeue.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake-parallel.c | 21 ++++++++++++++++----- tools/perf/bench/futex-wake.c | 22 ++++++++++++++++------ 5 files changed, 83 insertions(+), 28 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index dbcecec4eeda..f05db4cf983d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -122,12 +122,14 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpuset; + cpu_set_t *cpuset; struct sigaction act; unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; struct perf_cpu_map *cpu; + int nrcpus; + size_t size; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = params.nthreads; pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); + + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); - + } ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, (void *)(struct worker *) &worker[i]); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6fc9a3d55c1f..0abb3f7ee24f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -120,11 +120,17 @@ static void *workerfn(void *arg) static void create_threads(struct worker *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < params.nthreads; i++) { worker[i].tid = i; @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, } else worker[i].futex = &global_futex; - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } int bench_futex_lock_pi(int argc, const char **argv) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2f59d5d1c509..b6faabfafb8e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 861deb934745..e47f46a3a47e 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; + int nrcpus = perf_cpu_map__nr(cpu); + size_t size; threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void print_run(struct thread_data *waking_worker, unsigned int run_num) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index cfda48bef1d7..201a3555f09a 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -97,22 +97,32 @@ static void print_summary(void) static void block_threads(pthread_t *w, pthread_attr_t thread_attr, struct perf_cpu_map *cpu) { - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i; - + size_t size; + int nrcpus = perf_cpu_map__nr(cpu); threads_starting = params.nthreads; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } - if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); } static void toggle_done(int sig __maybe_unused, From 299687e18a06aa648c8d4ebb025b322ac83fe7dd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 6 Apr 2022 23:21:11 +0530 Subject: [PATCH 480/708] perf bench: Fix epoll bench to correct usage of affinity for machines with #CPUs > 1K The 'perf bench epoll' testcase fails on systems with more than 1K CPUs. Testcase: perf bench epoll all Result snippet: <<>> Run summary [PID 106497]: 1399 threads monitoring on 64 file-descriptors for 8 secs. perf: pthread_create: No such file or directory <<>> In epoll benchmarks (ctl, wait) pthread_create is invoked in do_threads from respective bench_epoll_* function. Though the logs shows direct failure from pthread_create, the actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the epoll benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/epoll-ctl.c | 25 +++++++++++++++++++------ tools/perf/bench/epoll-wait.c | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 134612bde0cb..4256dc5d6236 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0; + int nrcpus; + size_t size; if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) init_fdmaps(w, 50); if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 37de970c9743..2728b0140853 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -291,9 +291,11 @@ static void print_summary(void) static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; int ret = 0, events = EPOLLIN; + int nrcpus; + size_t size; if (oneshot) events |= EPOLLONESHOT; @@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = perf_cpu_map__nr(cpu); + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) } if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); From 3ae87d2f25c0e998da2721ce332e2b80d3d53c39 Mon Sep 17 00:00:00 2001 From: Piotr Chmura Date: Thu, 31 Mar 2022 17:55:50 +0200 Subject: [PATCH 481/708] media: si2157: unknown chip version Si2147-A30 ROM 0x50 Fix firmware file names assignment in si2157 tuner, allow for running devices without firmware files needed. modprobe gives error: unknown chip version Si2147-A30 ROM 0x50 Device initialization is interrupted. Caused by: 1. table si2157_tuners has swapped fields rom_id and required vs struct si2157_tuner_info. 2. both firmware file names can be null for devices with required == false - device uses build-in firmware in this case Tested on this device: m07ca:1871 AVerMedia Technologies, Inc. TD310 DVB-T/T2/C dongle [mchehab: fix mangled patch] Link: https://bugzilla.kernel.org/show_bug.cgi?id=215726 Link: https://lore.kernel.org/lkml/5f660108-8812-383c-83e4-29ee0558d623@leemhuis.info/ Link: https://lore.kernel.org/linux-media/c4bcaff8-fbad-969e-ad47-e2c487ac02a1@gmail.com Fixes: 1c35ba3bf972 ("media: si2157: use a different namespace for firmware") Cc: stable@vger.kernel.org # 5.17.x Signed-off-by: Piotr Chmura Tested-by: Robert Schlabbach Signed-off-by: Mauro Carvalho Chehab --- drivers/media/tuners/si2157.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index 47029746b89e..0de587b412d4 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -77,16 +77,16 @@ err_mutex_unlock: } static const struct si2157_tuner_info si2157_tuners[] = { - { SI2141, false, 0x60, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2141, false, 0x61, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2146, false, 0x11, SI2146_11_FIRMWARE, NULL }, - { SI2147, false, 0x50, SI2147_50_FIRMWARE, NULL }, - { SI2148, true, 0x32, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2148, true, 0x33, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2157, false, 0x50, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, - { SI2158, false, 0x50, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2158, false, 0x51, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2177, false, 0x50, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2141, 0x60, false, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2141, 0x61, false, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2146, 0x11, false, SI2146_11_FIRMWARE, NULL }, + { SI2147, 0x50, false, SI2147_50_FIRMWARE, NULL }, + { SI2148, 0x32, true, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2148, 0x33, true, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2157, 0x50, false, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2158, 0x50, false, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2158, 0x51, false, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2177, 0x50, false, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, }; static int si2157_load_firmware(struct dvb_frontend *fe, @@ -178,7 +178,7 @@ static int si2157_find_and_load_firmware(struct dvb_frontend *fe) } } - if (!fw_name && !fw_alt_name) { + if (required && !fw_name && !fw_alt_name) { dev_err(&client->dev, "unknown chip version Si21%d-%c%c%c ROM 0x%02x\n", part_id, cmd.args[1], cmd.args[3], cmd.args[4], rom_id); From bc21e74d4775f883ae1f542c1f1dc7205b15d925 Mon Sep 17 00:00:00 2001 From: Denis Nikitin Date: Tue, 29 Mar 2022 20:11:30 -0700 Subject: [PATCH 482/708] perf session: Remap buf if there is no space for event If a perf event doesn't fit into remaining buffer space return NULL to remap buf and fetch the event again. Keep the logic to error out on inadequate input from fuzzing. This fixes perf failing on ChromeOS (with 32b userspace): $ perf report -v -i perf.data ... prefetch_event: head=0x1fffff8 event->header_size=0x30, mmap_size=0x2000000: fuzzed or compressed perf.data? Error: failed to process sample Fixes: 57fc032ad643ffd0 ("perf session: Avoid infinite loop when seeing invalid header.size") Reviewed-by: James Clark Signed-off-by: Denis Nikitin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220330031130.2152327-1-denik@chromium.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b8dfe603e50..45a30040ec8d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2095,6 +2095,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2107,15 +2108,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } From 0ff26efe92844aa3910eff8951739d44a5ab6493 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:40 +0200 Subject: [PATCH 483/708] perf docs: Add perf-iostat link to manpages Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 9c330cdfa973..71ebdf8125de 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], linkperf:perf-evlist[1], linkperf:perf-ftrace[1], linkperf:perf-help[1], linkperf:perf-inject[1], -linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1], linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], linkperf:perf-script[1], linkperf:perf-test[1], From 3e6b43beb7b56ac6fd376c84f06d90ded73a2788 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 5 Apr 2022 00:15:41 +0200 Subject: [PATCH 484/708] perf tools: Add external commands to list-cmds The `perf --list-cmds` output prints only internal commands, although there is no reason for that from users' perspective. Adding the external commands to commands array with NULL function pointer allows printing all perf commands while not changing the logic of command handler selection. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220404221541.30312-2-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6aae7b6c376b..0170cb0819d6 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ struct cmd_struct { }; static struct cmd_struct commands[] = { + { "archive", NULL, 0 }, { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "iostat", NULL, 0 }, { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, @@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands+i; + if (p->fn == NULL) + continue; if (strcmp(p->cmd, cmd)) continue; exit(run_builtin(p, argc, argv)); From 940a445a904088eac715dd985c01847311a42459 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 Apr 2022 16:04:59 -0700 Subject: [PATCH 485/708] perf annotate: Drop objdump stderr to avoid getting stuck waiting for stdout output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If objdump writes to stderr it can block waiting for it to be read. As perf doesn't read stderr then progress stops with perf waiting for stdout output. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Dave Marchevsky Cc: Denis Nikitin Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Lexi Shao Cc: Li Huafei Cc: Mark Rutland Cc: Martin Liška Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Remi Bernon Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Thomas Richter Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20220407230503.1265036-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e4c641b240df..82cc396ef516 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) objdump_process.argv = objdump_argv; objdump_process.out = -1; objdump_process.err = -1; + objdump_process.no_stderr = 1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; From c54bc0fc84214b203f7a0ebfd1bd308ce2abe920 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Tue, 5 Apr 2022 21:17:32 +0200 Subject: [PATCH 486/708] timers: Fix warning condition in __run_timers() When the timer base is empty, base::next_expiry is set to base::clk + NEXT_TIMER_MAX_DELTA and base::next_expiry_recalc is false. When no timer is queued until jiffies reaches base::next_expiry value, the warning for not finding any expired timer and base::next_expiry_recalc is false in __run_timers() triggers. To prevent triggering the warning in this valid scenario base::timers_pending needs to be added to the warning condition. Fixes: 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") Reported-by: Johannes Berg Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220405191732.7438-3-anna-maria@linutronix.de --- kernel/time/timer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 85f1021ad459..9dd2a39cb3b0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1722,11 +1722,14 @@ static inline void __run_timers(struct timer_base *base) time_after_eq(jiffies, base->next_expiry)) { levels = collect_expired_timers(base, heads); /* - * The only possible reason for not finding any expired - * timer at this clk is that all matching timers have been - * dequeued. + * The two possible reasons for not finding any expired + * timer at this clk are that all matching timers have been + * dequeued or no timer has been queued since + * base::next_expiry was set to base::clk + + * NEXT_TIMER_MAX_DELTA. */ - WARN_ON_ONCE(!levels && !base->next_expiry_recalc); + WARN_ON_ONCE(!levels && !base->next_expiry_recalc + && base->timers_pending); base->clk++; base->next_expiry = __next_timer_interrupt(base); From 40e97e42961f8c6cc7bd5fe67cc18417e02d78f1 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 6 Dec 2021 09:59:50 -0500 Subject: [PATCH 487/708] tick/nohz: Use WARN_ON_ONCE() to prevent console saturation While running some testing on code that happened to allow the variable tick_nohz_full_running to get set but with no "possible" NOHZ cores to back up that setting, this warning triggered: if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) WARN_ON(tick_nohz_full_running); The console was overwhemled with an endless stream of one WARN per tick per core and there was no way to even see what was going on w/o using a serial console to capture it and then trace it back to this. Change it to WARN_ON_ONCE(). Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") Signed-off-by: Paul Gortmaker Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211206145950.10927-3-paul.gortmaker@windriver.com --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2d76c91b85de..3506f6ed790c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -188,7 +188,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL - WARN_ON(tick_nohz_full_running); + WARN_ON_ONCE(tick_nohz_full_running); #endif tick_do_timer_cpu = cpu; } From 9c95bc25ad3b1a2240cd1f896569292a57d3ce85 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 14 Feb 2022 16:47:39 +0800 Subject: [PATCH 488/708] tick/sched: Fix non-kernel-doc comment Fixes the following W=1 kernel build warning: kernel/time/tick-sched.c:1563: warning: This comment starts with '/**', but isn't a kernel-doc comment. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220214084739.63228-1-jiapeng.chong@linux.alibaba.com --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3506f6ed790c..d257721c68b8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1538,7 +1538,7 @@ void tick_cancel_sched_timer(int cpu) } #endif -/** +/* * Async notification about clocksource changes */ void tick_clock_notify(void) @@ -1559,7 +1559,7 @@ void tick_oneshot_notify(void) set_bit(0, &ts->check_clocks); } -/** +/* * Check, if a change happened, which makes oneshot possible. * * Called cyclic from the hrtimer softirq (driven by the timer From dbc2b1764734857d68425468ffa8486e97ab89df Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:14 +0200 Subject: [PATCH 489/708] mt76: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/net/wireless/mediatek/mt76/mt76x2/pci.c: In function ‘mt76x2e_probe’: ././include/linux/compiler_types.h:352:38: error: call to ‘__compiletime_assert_946’ \ declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Felix Fietkau Cc: Lorenzo Bianconi Cc: Ryder Lee Cc: Shayne Chen Cc: Sean Wang Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220405151517.29753-9-bp@alien8.de --- drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 8a22ee581674..df85ebc6e1df 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id) mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9); /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */ - mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf); + mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf); /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); From 6fb3a5868b2117611f41e421e10e6a8c2a13039a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 18:55:37 +0200 Subject: [PATCH 490/708] brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c: In function ‘brcmf_sdio_drivestrengthinit’: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:3798:2: error: case label does not reduce to an integer constant case SDIOD_DRVSTR_KEY(BRCM_CC_43143_CHIP_ID, 17): ^~~~ drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:3809:2: error: case label does not reduce to an integer constant case SDIOD_DRVSTR_KEY(BRCM_CC_43362_CHIP_ID, 13): ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: brcm80211-dev-list.pdl@broadcom.com Cc: netdev@vger.kernel.org Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/Ykx0iRlvtBnKqtbG@zn.tnic --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index ba3c159111d3..d78ccc223709 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype { BRCMF_SDIO_FT_SUB, }; -#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) +#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { From 5a6b06f5927c940fa44026695779c30b7536474c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 4 Apr 2022 22:48:00 +0200 Subject: [PATCH 491/708] ath9k: Fix usage of driver-private space in tx_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ieee80211_tx_info_clear_status() helper also clears the rate counts and the driver-private part of struct ieee80211_tx_info, so using it breaks quite a few other things. So back out of using it, and instead define a ath-internal helper that only clears the area between the status_driver_data and the rates info. Combined with moving the ath_frame_info struct to status_driver_data, this avoids clearing anything we shouldn't be, and so we can keep the existing code for handling the rate information. While fixing this I also noticed that the setting of tx_info->status.rates[tx_rateindex].count on hardware underrun errors was always immediately overridden by the normal setting of the same fields, so rearrange the code so that the underrun detection actually takes effect. The new helper could be generalised to a 'memset_between()' helper, but leave it as a driver-internal helper for now since this needs to go to stable. Cc: stable@vger.kernel.org Reported-by: Peter Seiderer Fixes: 037250f0a45c ("ath9k: Properly clear TX status area before reporting to mac80211") Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Peter Seiderer Tested-by: Peter Seiderer Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220404204800.2681133-1-toke@toke.dk --- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/ath9k/xmit.c | 30 ++++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 98090e40e1cf..e2791d45f5f5 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index cbcf96ac303e..db83cc4ba810 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2542,6 +2542,16 @@ skip_tx_complete: spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2553,7 +2563,7 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; - ieee80211_tx_info_clear_status(tx_info); + ath_clear_tx_status(tx_info); if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2569,6 +2579,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2590,13 +2607,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) From a25d5887821e242e5ea8388d8461ff20bedb0729 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Apr 2022 13:40:27 +0200 Subject: [PATCH 492/708] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 7 +++++-- arch/s390/configs/defconfig | 6 ++++-- arch/s390/configs/zfcpdump_defconfig | 6 +++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 498bed9b261b..e18006971e36 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -499,11 +499,13 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_CHELSIO is not set # CONFIG_NET_VENDOR_CISCO is not set # CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DAVICOM is not set # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_FUNGIBLE is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set @@ -588,13 +590,13 @@ CONFIG_MLX5_INFINIBAND=m CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_MLX5_VFIO_PCI=m CONFIG_VFIO_MDEV=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -733,6 +735,7 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -786,7 +789,6 @@ CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y @@ -814,6 +816,7 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_KFENCE=y +CONFIG_KFENCE_DEFERRABLE=y CONFIG_KFENCE_STATIC_KEYS=y CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 61e36b999f67..706df3a4a867 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -490,11 +490,13 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_CHELSIO is not set # CONFIG_NET_VENDOR_CISCO is not set # CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DAVICOM is not set # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_FUNGIBLE is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set @@ -578,13 +580,13 @@ CONFIG_MLX5_INFINIBAND=m CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_MLX5_VFIO_PCI=m CONFIG_VFIO_MDEV=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -720,6 +722,7 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -772,7 +775,6 @@ CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index c55c668dc3c7..a87fcc45e307 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -26,6 +26,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set # CONFIG_GCC_PLUGINS is not set +# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set @@ -60,7 +61,6 @@ CONFIG_ZFCP=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set -# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set @@ -71,10 +71,10 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity" CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 From ce522ba9ef7e2d9fb22a39eb3371c0c64e2a433e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Apr 2022 14:21:36 -1000 Subject: [PATCH 493/708] Linux 5.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8c7de9a72ea2..29e273d3f8cc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Superb Owl # *DOCUMENTATION* From c4212f3eb89fd5654f0a6ed2ee1d13fcb86cb664 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 10 Apr 2022 15:13:24 -0600 Subject: [PATCH 494/708] io_uring: flag the fact that linked file assignment is sane Give applications a way to tell if the kernel supports sane linked files, as in files being assigned at the right time to be able to reliably do while using IOSQE_IO_LINK to order them. Not really a bug fix, but flag it as such so that it gets pulled in with backports of the deferred file assignment. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- include/uapi/linux/io_uring.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 659f8ecba5b7..f060ad018ba4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11178,7 +11178,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | - IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP; + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | + IORING_FEAT_LINKED_FILE; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 784adc6f6ed2..1845cf7c80ba 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -296,6 +296,7 @@ struct io_uring_params { #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) +#define IORING_FEAT_LINKED_FILE (1U << 12) /* * io_uring_register(2) opcodes and arguments From 1acb34e7dd7720a1fff00cbd4d000ec3219dc9d6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 7 Apr 2022 09:18:39 -0700 Subject: [PATCH 495/708] drm/i915: Sunset igpu legacy mmap support based on GRAPHICS_VER_FULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent of the version check in the mmap ioctl was to maintain support for existing platforms (i.e., ADL/RPL and earlier), but drop support on all future igpu platforms. As we've seen on the dgpu side, the hardware teams are using a more fine-grained numbering system for IP version numbers these days, so it's possible the version number associated with our next igpu could be some form of "12.xx" rather than 13 or higher. Comparing against the full ver.release number will ensure the intent of the check is maintained no matter what numbering the hardware teams settle on. Fixes: d3f3baa3562a ("drm/i915: Reinstate the mmap ioctl for some platforms") Cc: Thomas Hellström Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220407161839.1073443-1-matthew.d.roper@intel.com (cherry picked from commit 8e7e5c077cd57ee9a36d58c65f07257dc49a88d5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c3ea243d414d..0c5c43852e24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -70,7 +70,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * mmap ioctl is disallowed for all discrete platforms, * and for all platforms with GRAPHICS_VER > 12. */ - if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12) + if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0)) return -EOPNOTSUPP; if (args->flags & ~(I915_MMAP_WC)) From 2f7a26abb8241a0208c68d22815aa247c5ddacab Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sat, 9 Apr 2022 03:26:55 +0200 Subject: [PATCH 496/708] ALSA: pcm: Test for "silence" field in struct "pcm_format_data" Syzbot reports "KASAN: null-ptr-deref Write in snd_pcm_format_set_silence".[1] It is due to missing validation of the "silence" field of struct "pcm_format_data" in "pcm_formats" array. Add a test for valid "pat" and, if it is not so, return -EINVAL. [1] https://lore.kernel.org/lkml/000000000000d188ef05dc2c7279@google.com/ Reported-and-tested-by: syzbot+205eb15961852c2c5974@syzkaller.appspotmail.com Signed-off-by: Fabio M. De Francesco Cc: Link: https://lore.kernel.org/r/20220409012655.9399-1-fmdefrancesco@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index 4866aed97aac..5588b6a1ee8b 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -433,7 +433,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int return 0; width = pcm_formats[(INT)format].phys; /* physical width */ pat = pcm_formats[(INT)format].silence; - if (! width) + if (!width || !pat) return -EINVAL; /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { From 264fb03497ec1c7841bba872571bcd11beed57a7 Mon Sep 17 00:00:00 2001 From: Tao Jin Date: Sat, 9 Apr 2022 18:44:24 -0400 Subject: [PATCH 497/708] ALSA: hda/realtek: add quirk for Lenovo Thinkpad X12 speakers For this specific device on Lenovo Thinkpad X12 tablet, the verbs were dumped by qemu running a guest OS that init this codec properly. After studying the dump, it turns out that the same quirk used by the other Lenovo devices can be reused. The patch was tested working against the mainline kernel. Cc: Signed-off-by: Tao Jin Link: https://lore.kernel.org/r/CO6PR03MB6241CD73310B37858FE64C85E1E89@CO6PR03MB6241.namprd03.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 61df440fdb61..44aed1a54845 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9270,6 +9270,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), From 258f3b8c3210b03386e4ad92b4bd8652b5c1beb3 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 10 Mar 2022 14:00:59 -0800 Subject: [PATCH 498/708] x86/tsx: Use MSR_TSX_CTRL to clear CPUID bits tsx_clear_cpuid() uses MSR_TSX_FORCE_ABORT to clear CPUID.RTM and CPUID.HLE. Not all CPUs support MSR_TSX_FORCE_ABORT, alternatively use MSR_IA32_TSX_CTRL when supported. [ bp: Document how and why TSX gets disabled. ] Fixes: 293649307ef9 ("x86/tsx: Clear CPUID bits when TSX always force aborts") Reported-by: kernel test robot Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Tested-by: Neelima Krishnan Cc: Link: https://lore.kernel.org/r/5b323e77e251a9c8bcdda498c5cc0095be1e1d3c.1646943780.git.pawan.kumar.gupta@linux.intel.com --- arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/cpu/tsx.c | 54 ++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8321c43554a1..8abf995677a4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -722,6 +722,7 @@ static void init_intel(struct cpuinfo_x86 *c) else if (tsx_ctrl_state == TSX_CTRL_DISABLE) tsx_disable(); else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) + /* See comment over that function for more details. */ tsx_clear_cpuid(); split_lock_init(); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 9c7a5f049292..ec6ff8000920 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,7 +58,7 @@ void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool __init tsx_ctrl_is_supported(void) +static bool tsx_ctrl_is_supported(void) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -84,6 +84,44 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) return TSX_CTRL_ENABLE; } +/* + * Disabling TSX is not a trivial business. + * + * First of all, there's a CPUID bit: X86_FEATURE_RTM_ALWAYS_ABORT + * which says that TSX is practically disabled (all transactions are + * aborted by default). When that bit is set, the kernel unconditionally + * disables TSX. + * + * In order to do that, however, it needs to dance a bit: + * + * 1. The first method to disable it is through MSR_TSX_FORCE_ABORT and + * the MSR is present only when *two* CPUID bits are set: + * + * - X86_FEATURE_RTM_ALWAYS_ABORT + * - X86_FEATURE_TSX_FORCE_ABORT + * + * 2. The second method is for CPUs which do not have the above-mentioned + * MSR: those use a different MSR - MSR_IA32_TSX_CTRL and disable TSX + * through that one. Those CPUs can also have the initially mentioned + * CPUID bit X86_FEATURE_RTM_ALWAYS_ABORT set and for those the same strategy + * applies: TSX gets disabled unconditionally. + * + * When either of the two methods are present, the kernel disables TSX and + * clears the respective RTM and HLE feature flags. + * + * An additional twist in the whole thing presents late microcode loading + * which, when done, may cause for the X86_FEATURE_RTM_ALWAYS_ABORT CPUID + * bit to be set after the update. + * + * A subsequent hotplug operation on any logical CPU except the BSP will + * cause for the supported CPUID feature bits to get re-detected and, if + * RTM and HLE get cleared all of a sudden, but, userspace did consult + * them before the update, then funny explosions will happen. Long story + * short: the kernel doesn't modify CPUID feature bits after booting. + * + * That's why, this function's call in init_intel() doesn't clear the + * feature flags. + */ void tsx_clear_cpuid(void) { u64 msr; @@ -97,6 +135,10 @@ void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); + } else if (tsx_ctrl_is_supported()) { + rdmsrl(MSR_IA32_TSX_CTRL, msr); + msr |= TSX_CTRL_CPUID_CLEAR; + wrmsrl(MSR_IA32_TSX_CTRL, msr); } } @@ -106,13 +148,11 @@ void __init tsx_init(void) int ret; /* - * Hardware will always abort a TSX transaction if both CPUID bits - * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is - * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them - * here. + * Hardware will always abort a TSX transaction when the CPUID bit + * RTM_ALWAYS_ABORT is set. In this case, it is better not to enumerate + * CPUID.RTM and CPUID.HLE bits. Clear them here. */ - if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && - boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT; tsx_clear_cpuid(); setup_clear_cpu_cap(X86_FEATURE_RTM); From 08d835dff916bfe8f45acc7b92c7af6c4081c8a7 Mon Sep 17 00:00:00 2001 From: Rei Yamamoto Date: Thu, 31 Mar 2022 09:33:09 +0900 Subject: [PATCH 499/708] genirq/affinity: Consider that CPUs on nodes can be unbalanced If CPUs on a node are offline at boot time, the number of nodes is different when building affinity masks for present cpus and when building affinity masks for possible cpus. This causes the following problem: In the case that the number of vectors is less than the number of nodes there are cases where bits of masks for present cpus are overwritten when building masks for possible cpus. Fix this by excluding CPUs, which are not part of the current build mask (present/possible). [ tglx: Massaged changelog and added comment ] Fixes: b82592199032 ("genirq/affinity: Spread IRQs to all available NUMA nodes") Signed-off-by: Rei Yamamoto Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220331003309.10891-1-yamamoto.rei@jp.fujitsu.com --- kernel/irq/affinity.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f7ff8919dc9b..fdf170404650 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk); if (++curvec == last_affv) curvec = firstvec; } From 400331f8ffa3bec5c561417e5eec6848464e9160 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 10 Mar 2022 14:02:09 -0800 Subject: [PATCH 500/708] x86/tsx: Disable TSX development mode at boot A microcode update on some Intel processors causes all TSX transactions to always abort by default[*]. Microcode also added functionality to re-enable TSX for development purposes. With this microcode loaded, if tsx=on was passed on the cmdline, and TSX development mode was already enabled before the kernel boot, it may make the system vulnerable to TSX Asynchronous Abort (TAA). To be on safer side, unconditionally disable TSX development mode during boot. If a viable use case appears, this can be revisited later. [*]: Intel TSX Disable Update for Selected Processors, doc ID: 643557 [ bp: Drop unstable web link, massage heavily. ] Suggested-by: Andrew Cooper Suggested-by: Borislav Petkov Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Tested-by: Neelima Krishnan Cc: Link: https://lore.kernel.org/r/347bd844da3a333a9793c6687d4e4eb3b2419a3e.1646943780.git.pawan.kumar.gupta@linux.intel.com --- arch/x86/include/asm/msr-index.h | 4 +-- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/cpu.h | 5 ++- arch/x86/kernel/cpu/intel.c | 8 ----- arch/x86/kernel/cpu/tsx.c | 50 ++++++++++++++++++++++++-- tools/arch/x86/include/asm/msr-index.h | 4 +-- 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 0eb90d21049e..ee15311b6be1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,9 +128,9 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ -/* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 -#define RNGDS_MITG_DIS BIT(0) +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed4417500700..e342ae4db3c4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1855,6 +1855,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); + + tsx_ap_init(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index ee6f23f7587d..2a8e584fc991 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -55,11 +55,10 @@ enum tsx_ctrl_states { extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); -extern void tsx_enable(void); -extern void tsx_disable(void); -extern void tsx_clear_cpuid(void); +void tsx_ap_init(void); #else static inline void tsx_init(void) { } +static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ extern void get_cpu_cap(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8abf995677a4..f7a5370a9b3b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -717,14 +717,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_misc_features(c); - if (tsx_ctrl_state == TSX_CTRL_ENABLE) - tsx_enable(); - else if (tsx_ctrl_state == TSX_CTRL_DISABLE) - tsx_disable(); - else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) - /* See comment over that function for more details. */ - tsx_clear_cpuid(); - split_lock_init(); bus_lock_init(); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec6ff8000920..ec7bbac3a9f2 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -19,7 +19,7 @@ enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; -void tsx_disable(void) +static void tsx_disable(void) { u64 tsx; @@ -39,7 +39,7 @@ void tsx_disable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -void tsx_enable(void) +static void tsx_enable(void) { u64 tsx; @@ -122,7 +122,7 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) * That's why, this function's call in init_intel() doesn't clear the * feature flags. */ -void tsx_clear_cpuid(void) +static void tsx_clear_cpuid(void) { u64 msr; @@ -142,11 +142,42 @@ void tsx_clear_cpuid(void) } } +/* + * Disable TSX development mode + * + * When the microcode released in Feb 2022 is applied, TSX will be disabled by + * default on some processors. MSR 0x122 (TSX_CTRL) and MSR 0x123 + * (IA32_MCU_OPT_CTRL) can be used to re-enable TSX for development, doing so is + * not recommended for production deployments. In particular, applying MD_CLEAR + * flows for mitigation of the Intel TSX Asynchronous Abort (TAA) transient + * execution attack may not be effective on these processors when Intel TSX is + * enabled with updated microcode. + */ +static void tsx_dev_mode_disable(void) +{ + u64 mcu_opt_ctrl; + + /* Check if RTM_ALLOW exists */ + if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl); + + if (mcu_opt_ctrl & RTM_ALLOW) { + mcu_opt_ctrl &= ~RTM_ALLOW; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl); + setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT); + } +} + void __init tsx_init(void) { char arg[5] = {}; int ret; + tsx_dev_mode_disable(); + /* * Hardware will always abort a TSX transaction when the CPUID bit * RTM_ALWAYS_ABORT is set. In this case, it is better not to enumerate @@ -215,3 +246,16 @@ void __init tsx_init(void) setup_force_cpu_cap(X86_FEATURE_HLE); } } + +void tsx_ap_init(void) +{ + tsx_dev_mode_disable(); + + if (tsx_ctrl_state == TSX_CTRL_ENABLE) + tsx_enable(); + else if (tsx_ctrl_state == TSX_CTRL_DISABLE) + tsx_disable(); + else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) + /* See comment over that function for more details. */ + tsx_clear_cpuid(); +} diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 0eb90d21049e..ee15311b6be1 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -128,9 +128,9 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ -/* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 -#define RNGDS_MITG_DIS BIT(0) +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 From b2cd2cde7d690b760bcdd675380ff37c3e1aa38d Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Thu, 7 Apr 2022 10:16:10 +0530 Subject: [PATCH 501/708] net: phy: LAN87xx: remove genphy_softreset in config_aneg When the T1 phy master/slave state is changed, at the end of config_aneg function genphy_softreset is called. After the reset all the registers configured during the config_init are restored to default value. To avoid this, removed the genphy_softreset call. v1->v2 ------ Added the author in cc Fixes: 8a1b415d70b7 ("net: phy: added ethtool master-slave configuration support") Signed-off-by: Arun Ramadoss Signed-off-by: David S. Miller --- drivers/net/phy/microchip_t1.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index 389df3f4293c..3f79bbbe62d3 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -706,7 +706,6 @@ static int lan87xx_read_status(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { u16 ctl = 0; - int rc; switch (phydev->master_slave_set) { case MASTER_SLAVE_CFG_MASTER_FORCE: @@ -722,11 +721,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) return -EOPNOTSUPP; } - rc = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); - if (rc == 1) - rc = genphy_soft_reset(phydev); - - return rc; + return phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); } static struct phy_driver microchip_t1_phy_driver[] = { From a6aaa00324240967272b451bfa772547bd576ee6 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Thu, 7 Apr 2022 08:25:21 -0500 Subject: [PATCH 502/708] net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link When using a fixed-link, the altr_tse_pcs driver crashes due to null-pointer dereference as no phy_device is provided to tse_pcs_fix_mac_speed function. Fix this by adding a check for phy_dev before calling the tse_pcs_fix_mac_speed() function. Also clean up the tse_pcs_fix_mac_speed function a bit. There is no need to check for splitter_base and sgmii_adapter_base because the driver will fail if these 2 variables are not derived from the device tree. Fixes: fb3bbdb85989 ("net: ethernet: Add TSE PCS support to dwmac-socfpga") Signed-off-by: Dinh Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 8 -------- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h | 4 ++++ drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 13 +++++-------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871..00f6d347eaf7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bd..694ac25ef426 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include #include +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index b7c2579c963b..ac9e6c7a33b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,16 +59,14 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + writew(SGMII_ADAPTER_DISABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -93,7 +88,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + if (phy_dev) tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); } From e8a64bbaaad1f6548cec5508297bc6d45e8ab69e Mon Sep 17 00:00:00 2001 From: Benedikt Spranger Date: Fri, 8 Apr 2022 11:47:45 +0200 Subject: [PATCH 503/708] net/sched: taprio: Check if socket flags are valid A user may set the SO_TXTIME socket option to ensure a packet is send at a given time. The taprio scheduler has to confirm, that it is allowed to send a packet at that given time, by a check against the packet time schedule. The scheduler drop the packet, if the gates are closed at the given send time. The check, if SO_TXTIME is set, may fail since sk_flags are part of an union and the union is used otherwise. This happen, if a socket is not a full socket, like a request socket for example. Add a check to verify, if the union is used for sk_flags. Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Signed-off-by: Benedikt Spranger Reviewed-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 377f896bdedc..b9c71a304d39 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -417,7 +417,8 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, { struct taprio_sched *q = qdisc_priv(sch); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { From 6624bb34b4eb19f715db9908cca00122748765d7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Apr 2022 11:42:03 +0200 Subject: [PATCH 504/708] nl80211: correctly check NL80211_ATTR_REG_ALPHA2 size We need this to be at least two bytes, so we can access alpha2[0] and alpha2[1]. It may be three in case some userspace used NUL-termination since it was NLA_STRING (and we also push it out with NUL-termination). Cc: stable@vger.kernel.org Reported-by: Lee Jones Link: https://lore.kernel.org/r/20220411114201.fd4a31f06541.Ie7ff4be2cf348d8cc28ed0d626fc54becf7ea799@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ee1c2b6b6971..21e808fcb676 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -528,7 +528,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, From a5199b5626cd6913cf8776a835bc63d40e0686ad Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 11 Apr 2022 14:37:51 +0530 Subject: [PATCH 505/708] cfg80211: hold bss_lock while updating nontrans_list Synchronize additions to nontrans_list of transmitting BSS with bss_lock to avoid races. Also when cfg80211_add_nontrans_list() fails __cfg80211_unlink_bss() needs bss_lock to be held (has lockdep assert on bss_lock). So protect the whole block with bss_lock to avoid races and warnings. Found during code review. Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Rameshkumar Sundaram Link: https://lore.kernel.org/r/1649668071-9370-1-git-send-email-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b2fdac96bab0..4a6d86432910 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2018,11 +2018,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); From fb4bccd863ccccd36ad000601856609e259a1859 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 6 Apr 2022 10:56:59 -0700 Subject: [PATCH 506/708] mac80211: fix ht_capa printout in debugfs Don't use sizeof(pointer) when calculating scnprintf offset. Fixes: 01f84f0ed3b4 ("mac80211: reduce stack usage in debugfs") Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20220406175659.20611-1-greearb@candelatech.com [correct the Fixes tag] Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 9479f2787ea7..88d9cc945a21 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -441,7 +441,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, #define PRINT_HT_CAP(_cond, _str) \ do { \ if (_cond) \ - p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \ + p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \ } while (0) char *buf, *p; int i; From 05ae2fba821c4d122ab4ba3e52144e21586c4010 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 9 Apr 2022 13:20:19 +0200 Subject: [PATCH 507/708] netfilter: nft_socket: make cgroup match work in input too cgroupv2 helper function ignores the already-looked up sk and uses skb->sk instead. Just pass sk from the calling function instead; this will make cgroup matching work for udp and tcp in input even when edemux did not set skb->sk already. Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2") Signed-off-by: Florian Westphal Tested-by: Topi Miettinen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index bd3792f080ed..6d9e8e0a3a7d 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -37,12 +37,11 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt, #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool -nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level) +nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { - struct sock *sk = skb_to_full_sk(pkt->skb); struct cgroup *cgrp; - if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) + if (!sk_fullsock(sk)) return false; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); @@ -109,7 +108,7 @@ static void nft_socket_eval(const struct nft_expr *expr, break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: - if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) { + if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; return; } From 1a7eb80d170c28be2928433702256fe2a0bd1e0f Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Fri, 8 Apr 2022 09:49:41 +0000 Subject: [PATCH 508/708] dpaa_eth: Fix missing of_node_put in dpaa_get_ts_info() Both of of_get_parent() and of_parse_phandle() return node pointer with refcount incremented, use of_node_put() on it to decrease refcount when done. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 763d2c7b5fb1..5750f9a56393 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev, info->phc_index = -1; fman_node = of_get_parent(mac_node); - if (fman_node) + if (fman_node) { ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0); + of_node_put(fman_node); + } - if (ptp_node) + if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); + } if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); From e3fa461d8b0e185b7da8a101fe94dfe6dd500ac0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 8 Apr 2022 16:03:42 +0200 Subject: [PATCH 509/708] ipv6: fix panic when forwarding a pkt with no in6 dev kongweibin reported a kernel panic in ip6_forward() when input interface has no in6 dev associated. The following tc commands were used to reproduce this panic: tc qdisc del dev vxlan100 root tc qdisc add dev vxlan100 root netem corrupt 5% CC: stable@vger.kernel.org Fixes: ccd27f05ae7b ("ipv6: fix 'disable_policy' for fwd packets") Reported-by: kongweibin Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e23f058166af..fa63ef2bd99c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; From 537fef808be5ea56f6fc06932162550819a3b3c3 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 7 Apr 2022 13:28:33 -0700 Subject: [PATCH 510/708] drm/msm: Fix range size vs end confusion The fourth param is size, rather than range_end. Note that we could increase the address space size if we had a way to prevent buffers from spanning a 4G split, mostly just to avoid fw bugs with 64b math. Fixes: 84c31ee16f90 ("drm/msm/a6xx: Add support for per-instance pagetables") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20220407202836.1211268-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 83c31b2ad865..ccc4fcf7a630 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1742,7 +1742,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, 0x1ffffffffULL); + "gpu", 0x100000000ULL, SZ_4G); } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) From 047ae665577776b7feb11bd4f81f46627cff95e7 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Thu, 7 Apr 2022 10:31:51 +0800 Subject: [PATCH 511/708] drm/msm/mdp5: check the return of kzalloc() kzalloc() is a memory allocation function which can return NULL when some internal memory errors happen. So it is better to check it to prevent potential wrong memory access. Besides, since mdp5_plane_reset() is void type, so we should better set `plane-state` to NULL after releasing it. Signed-off-by: Xiaoke Wang Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/481055/ Link: https://lore.kernel.org/r/tencent_8E2A1C78140EE1784AB2FF4B2088CC0AB908@qq.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index b176338ab59b..85ef10b888e9 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -91,7 +91,10 @@ static void mdp5_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_destroy_state(plane->state); kfree(to_mdp5_plane_state(plane->state)); + plane->state = NULL; mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return; if (plane->type == DRM_PLANE_TYPE_PRIMARY) mdp5_state->base.zpos = STAGE_BASE; From e2a88eabb02410267519b838fb9b79f5206769be Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2022 15:17:48 +0100 Subject: [PATCH 512/708] drm/msm: Stop using iommu_present() Even if some IOMMU has registered itself on the platform "bus", that doesn't necessarily mean it provides translation for the device we care about. Replace iommu_present() with a more appropriate check. Signed-off-by: Robin Murphy Reviewed-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/480707/ Link: https://lore.kernel.org/r/5ab4f4574d7f3e042261da702d493ee40d003356.1649168268.git.robin.murphy@arm.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e88c4b46a56f..2905b82a9de3 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -274,7 +274,7 @@ bool msm_use_mmu(struct drm_device *dev) struct msm_drm_private *priv = dev->dev_private; /* a2xx comes with its own MMU */ - return priv->is_a2xx || iommu_present(&platform_bus_type); + return priv->is_a2xx || device_iommu_mapped(dev->dev); } static int msm_init_vram(struct drm_device *dev) From 47b7de6b88b962ef339a2427a023d2a23d161654 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 17 Mar 2022 17:07:31 -0700 Subject: [PATCH 513/708] drm/msm/dsi: Use connector directly in msm_dsi_manager_connector_init() The member 'msm_dsi->connector' isn't assigned until msm_dsi_manager_connector_init() returns (see msm_dsi_modeset_init() and how it assigns the return value). Therefore this pointer is going to be NULL here. Let's use 'connector' which is what was intended. Cc: Dmitry Baryshkov Cc: Sean Paul Fixes: 6d5e78406991 ("drm/msm/dsi: Move dsi panel init into modeset init path") Signed-off-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/478693/ Link: https://lore.kernel.org/r/20220318000731.2823718-1-swboyd@chromium.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 0c1b7dde377c..9f6af0f0fe00 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -638,7 +638,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) return connector; fail: - connector->funcs->destroy(msm_dsi->connector); + connector->funcs->destroy(connector); return ERR_PTR(ret); } From 8b2c181e3dcf7562445af6702ee94aaedcbe13c8 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 8 Apr 2022 14:04:54 -0700 Subject: [PATCH 514/708] drm/msm/dp: add fail safe mode outside of event_mutex context There is possible circular locking dependency detected on event_mutex (see below logs). This is due to set fail safe mode is done at dp_panel_read_sink_caps() within event_mutex scope. To break this possible circular locking, this patch move setting fail safe mode out of event_mutex scope. [ 23.958078] ====================================================== [ 23.964430] WARNING: possible circular locking dependency detected [ 23.970777] 5.17.0-rc2-lockdep-00088-g05241de1f69e #148 Not tainted [ 23.977219] ------------------------------------------------------ [ 23.983570] DrmThread/1574 is trying to acquire lock: [ 23.988763] ffffff808423aab0 (&dp->event_mutex){+.+.}-{3:3}, at: msm_dp_displ ay_enable+0x58/0x164 [ 23.997895] [ 23.997895] but task is already holding lock: [ 24.003895] ffffff808420b280 (&kms->commit_lock[i]/1){+.+.}-{3:3}, at: lock_c rtcs+0x80/0x8c [ 24.012495] [ 24.012495] which lock already depends on the new lock. [ 24.012495] [ 24.020886] [ 24.020886] the existing dependency chain (in reverse order) is: [ 24.028570] [ 24.028570] -> #5 (&kms->commit_lock[i]/1){+.+.}-{3:3}: [ 24.035472] __mutex_lock+0xc8/0x384 [ 24.039695] mutex_lock_nested+0x54/0x74 [ 24.044272] lock_crtcs+0x80/0x8c [ 24.048222] msm_atomic_commit_tail+0x1e8/0x3d0 [ 24.053413] commit_tail+0x7c/0xfc [ 24.057452] drm_atomic_helper_commit+0x158/0x15c [ 24.062826] drm_atomic_commit+0x60/0x74 [ 24.067403] drm_mode_atomic_ioctl+0x6b0/0x908 [ 24.072508] drm_ioctl_kernel+0xe8/0x168 [ 24.077086] drm_ioctl+0x320/0x370 [ 24.081123] drm_compat_ioctl+0x40/0xdc [ 24.085602] __arm64_compat_sys_ioctl+0xe0/0x150 [ 24.090895] invoke_syscall+0x80/0x114 [ 24.095294] el0_svc_common.constprop.3+0xc4/0xf8 [ 24.100668] do_el0_svc_compat+0x2c/0x54 [ 24.105242] el0_svc_compat+0x4c/0xe4 [ 24.109548] el0t_32_sync_handler+0xc4/0xf4 [ 24.114381] el0t_32_sync+0x178 [ 24.118688] [ 24.118688] -> #4 (&kms->commit_lock[i]){+.+.}-{3:3}: [ 24.125408] __mutex_lock+0xc8/0x384 [ 24.129628] mutex_lock_nested+0x54/0x74 [ 24.134204] lock_crtcs+0x80/0x8c [ 24.138155] msm_atomic_commit_tail+0x1e8/0x3d0 [ 24.143345] commit_tail+0x7c/0xfc [ 24.147382] drm_atomic_helper_commit+0x158/0x15c [ 24.152755] drm_atomic_commit+0x60/0x74 [ 24.157323] drm_atomic_helper_set_config+0x68/0x90 [ 24.162869] drm_mode_setcrtc+0x394/0x648 [ 24.167535] drm_ioctl_kernel+0xe8/0x168 [ 24.172102] drm_ioctl+0x320/0x370 [ 24.176135] drm_compat_ioctl+0x40/0xdc [ 24.180621] __arm64_compat_sys_ioctl+0xe0/0x150 [ 24.185904] invoke_syscall+0x80/0x114 [ 24.190302] el0_svc_common.constprop.3+0xc4/0xf8 [ 24.195673] do_el0_svc_compat+0x2c/0x54 [ 24.200241] el0_svc_compat+0x4c/0xe4 [ 24.204544] el0t_32_sync_handler+0xc4/0xf4 [ 24.209378] el0t_32_sync+0x174/0x178 [ 24.213680] -> #3 (crtc_ww_class_mutex){+.+.}-{3:3}: [ 24.220308] __ww_mutex_lock.constprop.20+0xe8/0x878 [ 24.225951] ww_mutex_lock+0x60/0xd0 [ 24.230166] modeset_lock+0x190/0x19c [ 24.234467] drm_modeset_lock+0x34/0x54 [ 24.238953] drmm_mode_config_init+0x550/0x764 [ 24.244065] msm_drm_bind+0x170/0x59c [ 24.248374] try_to_bring_up_master+0x244/0x294 [ 24.253572] __component_add+0xf4/0x14c [ 24.258057] component_add+0x2c/0x38 [ 24.262273] dsi_dev_attach+0x2c/0x38 [ 24.266575] dsi_host_attach+0xc4/0x120 [ 24.271060] mipi_dsi_attach+0x34/0x48 [ 24.275456] devm_mipi_dsi_attach+0x28/0x68 [ 24.280298] ti_sn_bridge_probe+0x2b4/0x2dc [ 24.285137] auxiliary_bus_probe+0x78/0x90 [ 24.289893] really_probe+0x1e4/0x3d8 [ 24.294194] __driver_probe_device+0x14c/0x164 [ 24.299298] driver_probe_device+0x54/0xf8 [ 24.304043] __device_attach_driver+0xb4/0x118 [ 24.309145] bus_for_each_drv+0xb0/0xd4 [ 24.313628] __device_attach+0xcc/0x158 [ 24.318112] device_initial_probe+0x24/0x30 [ 24.322954] bus_probe_device+0x38/0x9c [ 24.327439] deferred_probe_work_func+0xd4/0xf0 [ 24.332628] process_one_work+0x2f0/0x498 [ 24.337289] process_scheduled_works+0x44/0x48 [ 24.342391] worker_thread+0x1e4/0x26c [ 24.346788] kthread+0xe4/0xf4 [ 24.350470] ret_from_fork+0x10/0x20 [ 24.354683] [ 24.354683] [ 24.354683] -> #2 (crtc_ww_class_acquire){+.+.}-{0:0}: [ 24.361489] drm_modeset_acquire_init+0xe4/0x138 [ 24.366777] drm_helper_probe_detect_ctx+0x44/0x114 [ 24.372327] check_connector_changed+0xbc/0x198 [ 24.377517] drm_helper_hpd_irq_event+0xcc/0x11c [ 24.382804] dsi_hpd_worker+0x24/0x30 [ 24.387104] process_one_work+0x2f0/0x498 [ 24.391762] worker_thread+0x1d0/0x26c [ 24.396158] kthread+0xe4/0xf4 [ 24.399840] ret_from_fork+0x10/0x20 [ 24.404053] [ 24.404053] -> #1 (&dev->mode_config.mutex){+.+.}-{3:3}: [ 24.411032] __mutex_lock+0xc8/0x384 [ 24.415247] mutex_lock_nested+0x54/0x74 [ 24.419819] dp_panel_read_sink_caps+0x23c/0x26c [ 24.425108] dp_display_process_hpd_high+0x34/0xd4 [ 24.430570] dp_display_usbpd_configure_cb+0x30/0x3c [ 24.436205] hpd_event_thread+0x2ac/0x550 [ 24.440864] kthread+0xe4/0xf4 [ 24.444544] ret_from_fork+0x10/0x20 [ 24.448757] [ 24.448757] -> #0 (&dp->event_mutex){+.+.}-{3:3}: [ 24.455116] __lock_acquire+0xe2c/0x10d8 [ 24.459690] lock_acquire+0x1ac/0x2d0 [ 24.463988] __mutex_lock+0xc8/0x384 [ 24.468201] mutex_lock_nested+0x54/0x74 [ 24.472773] msm_dp_display_enable+0x58/0x164 [ 24.477789] dp_bridge_enable+0x24/0x30 [ 24.482273] drm_atomic_bridge_chain_enable+0x78/0x9c [ 24.488006] drm_atomic_helper_commit_modeset_enables+0x1bc/0x244 [ 24.494801] msm_atomic_commit_tail+0x248/0x3d0 [ 24.499992] commit_tail+0x7c/0xfc [ 24.504031] drm_atomic_helper_commit+0x158/0x15c [ 24.509404] drm_atomic_commit+0x60/0x74 [ 24.513976] drm_mode_atomic_ioctl+0x6b0/0x908 [ 24.519079] drm_ioctl_kernel+0xe8/0x168 [ 24.523650] drm_ioctl+0x320/0x370 [ 24.527689] drm_compat_ioctl+0x40/0xdc [ 24.532175] __arm64_compat_sys_ioctl+0xe0/0x150 [ 24.537463] invoke_syscall+0x80/0x114 [ 24.541861] el0_svc_common.constprop.3+0xc4/0xf8 [ 24.547235] do_el0_svc_compat+0x2c/0x54 [ 24.551806] el0_svc_compat+0x4c/0xe4 [ 24.556106] el0t_32_sync_handler+0xc4/0xf4 [ 24.560948] el0t_32_sync+0x174/0x178 Changes in v2: -- add circular lockiing trace Fixes: d4aca422539c ("drm/msm/dp: always add fail-safe mode into connector mode list") Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/481396/ Link: https://lore.kernel.org/r/1649451894-554-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 6 ++++++ drivers/gpu/drm/msm/dp/dp_panel.c | 20 ++++++++++---------- drivers/gpu/drm/msm/dp/dp_panel.h | 1 + 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 178b774a5fbd..a42732b67349 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -580,6 +580,12 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); mutex_unlock(&dp->event_mutex); + /* + * add fail safe mode outside event_mutex scope + * to avoid potiential circular lock with drm thread + */ + dp_panel_add_fail_safe_mode(dp->dp_display.connector); + /* uevent will complete connection part */ return 0; }; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index f1418722c549..26c3653c99ec 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -151,6 +151,15 @@ static int dp_panel_update_modes(struct drm_connector *connector, return rc; } +void dp_panel_add_fail_safe_mode(struct drm_connector *connector) +{ + /* fail safe edid */ + mutex_lock(&connector->dev->mode_config.mutex); + if (drm_add_modes_noedid(connector, 640, 480)) + drm_set_preferred_mode(connector, 640, 480); + mutex_unlock(&connector->dev->mode_config.mutex); +} + int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector) { @@ -207,16 +216,7 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, goto end; } - /* fail safe edid */ - mutex_lock(&connector->dev->mode_config.mutex); - if (drm_add_modes_noedid(connector, 640, 480)) - drm_set_preferred_mode(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); - } else { - /* always add fail-safe mode as backup mode */ - mutex_lock(&connector->dev->mode_config.mutex); - drm_add_modes_noedid(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); + dp_panel_add_fail_safe_mode(connector); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 9023e5bb4b8b..99739ea679a7 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -59,6 +59,7 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel); int dp_panel_deinit(struct dp_panel *dp_panel); int dp_panel_timing_cfg(struct dp_panel *dp_panel); void dp_panel_dump_regs(struct dp_panel *dp_panel); +void dp_panel_add_fail_safe_mode(struct drm_connector *connector); int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector); u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp, From 0c8b6641c8410930e2a2f4a437ac3f987fbf9404 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 6 Apr 2022 14:37:13 +0800 Subject: [PATCH 515/708] selftests: kvm: add tsc_scaling_sync to .gitignore The tsc_scaling_sync's binary should be present in the .gitignore file for the git to ignore it. Signed-off-by: Like Xu Message-Id: <20220406063715.55625-3-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 573d93a1d61f..0b0e4402bba6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -34,6 +34,7 @@ /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test +/x86_64/tsc_scaling_sync /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/userspace_io_test From af105c9cc9ec8fdc087827a98d4b9dc10d61c358 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 6 Apr 2022 14:37:15 +0800 Subject: [PATCH 516/708] Documentation: KVM: Add SPDX-License-Identifier tag +new file mode 100644 +WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 +#27: FILE: Documentation/virt/kvm/x86/errata.rst:1: Opportunistically update all other non-added KVM documents and remove a new extra blank line at EOF for x86/errata.rst. Signed-off-by: Like Xu Message-Id: <20220406063715.55625-5-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/vcpu-requests.rst | 2 ++ Documentation/virt/kvm/x86/amd-memory-encryption.rst | 2 ++ Documentation/virt/kvm/x86/errata.rst | 2 +- Documentation/virt/kvm/x86/running-nested-guests.rst | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst index db43ee571f5a..31f62b64e07b 100644 --- a/Documentation/virt/kvm/vcpu-requests.rst +++ b/Documentation/virt/kvm/vcpu-requests.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================= KVM VCPU Requests ================= diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index 1c6847fff304..2d307811978c 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ====================================== Secure Encrypted Virtualization (SEV) ====================================== diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst index 806f049b6975..410e0aa63493 100644 --- a/Documentation/virt/kvm/x86/errata.rst +++ b/Documentation/virt/kvm/x86/errata.rst @@ -1,3 +1,4 @@ +.. SPDX-License-Identifier: GPL-2.0 ======================================= Known limitations of CPU virtualization @@ -36,4 +37,3 @@ Nested virtualization features ------------------------------ TBD - diff --git a/Documentation/virt/kvm/x86/running-nested-guests.rst b/Documentation/virt/kvm/x86/running-nested-guests.rst index bd70c69468ae..a27e6768d900 100644 --- a/Documentation/virt/kvm/x86/running-nested-guests.rst +++ b/Documentation/virt/kvm/x86/running-nested-guests.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ============================== Running nested guests with KVM ============================== From c538dc792ff7e456d777f585fdf96aa4e781ed66 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 8 Apr 2022 08:37:10 -0500 Subject: [PATCH 517/708] KVM: SVM: Do not activate AVIC for SEV-enabled guest Since current AVIC implementation cannot support encrypted memory, inhibit AVIC for SEV-enabled guest. Signed-off-by: Suravee Suthikulpanit Message-Id: <20220408133710.54275-1-suravee.suthikulpanit@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/avic.c | 3 ++- arch/x86/kvm/svm/sev.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0d37ba442de3..92843fcdc1cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1052,6 +1052,7 @@ enum kvm_apicv_inhibit { APICV_INHIBIT_REASON_X2APIC, APICV_INHIBIT_REASON_BLOCKIRQ, APICV_INHIBIT_REASON_ABSENT, + APICV_INHIBIT_REASON_SEV, }; struct kvm_arch { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index a1cf9c31273b..421619540ff9 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -837,7 +837,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_IRQWIN) | BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_SEV); return supported & BIT(reason); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c2fe89ecdb2d..537aaddc852f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -260,6 +260,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); + return 0; e_free: From 42dcbe7d8bac997eef4c379e61d9121a15ed4e36 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 7 Apr 2022 22:10:13 +0200 Subject: [PATCH 518/708] KVM: x86: hyper-v: Avoid writing to TSC page without an active vCPU The following WARN is triggered from kvm_vm_ioctl_set_clock(): WARNING: CPU: 10 PID: 579353 at arch/x86/kvm/../../../virt/kvm/kvm_main.c:3161 mark_page_dirty_in_slot+0x6c/0x80 [kvm] ... CPU: 10 PID: 579353 Comm: qemu-system-x86 Tainted: G W O 5.16.0.stable #20 Hardware name: LENOVO 20UF001CUS/20UF001CUS, BIOS R1CET65W(1.34 ) 06/17/2021 RIP: 0010:mark_page_dirty_in_slot+0x6c/0x80 [kvm] ... Call Trace: ? kvm_write_guest+0x114/0x120 [kvm] kvm_hv_invalidate_tsc_page+0x9e/0xf0 [kvm] kvm_arch_vm_ioctl+0xa26/0xc50 [kvm] ? schedule+0x4e/0xc0 ? __cond_resched+0x1a/0x50 ? futex_wait+0x166/0x250 ? __send_signal+0x1f1/0x3d0 kvm_vm_ioctl+0x747/0xda0 [kvm] ... The WARN was introduced by commit 03c0304a86bc ("KVM: Warn if mark_page_dirty() is called without an active vCPU") but the change seems to be correct (unlike Hyper-V TSC page update mechanism). In fact, there's no real need to actually write to guest memory to invalidate TSC page, this can be done by the first vCPU which goes through kvm_guest_time_update(). Reported-by: Maxim Levitsky Reported-by: Naresh Kamboju Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20220407201013.963226-1-vkuznets@redhat.com> --- arch/x86/include/asm/kvm_host.h | 4 +-- arch/x86/kvm/hyperv.c | 44 ++++++++------------------------- arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c | 7 +++--- 4 files changed, 15 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 92843fcdc1cf..e0c0f0e1f754 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -974,12 +974,10 @@ enum hv_tsc_page_status { HV_TSC_PAGE_UNSET = 0, /* TSC page MSR was written by the guest, update pending */ HV_TSC_PAGE_GUEST_CHANGED, - /* TSC page MSR was written by KVM userspace, update pending */ + /* TSC page update was triggered from the host side */ HV_TSC_PAGE_HOST_CHANGED, /* TSC page was properly set up and is currently active */ HV_TSC_PAGE_SET, - /* TSC page is currently being updated and therefore is inactive */ - HV_TSC_PAGE_UPDATING, /* TSC page was set up with an inaccessible GPA */ HV_TSC_PAGE_BROKEN, }; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 123b677111c5..46f9dfb60469 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1135,11 +1135,13 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) - return; - mutex_lock(&hv->hv_lock); + + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_SET || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) + goto out_unlock; + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) goto out_unlock; @@ -1201,45 +1203,19 @@ out_unlock: mutex_unlock(&hv->hv_lock); } -void kvm_hv_invalidate_tsc_page(struct kvm *kvm) +void kvm_hv_request_tsc_page_update(struct kvm *kvm) { struct kvm_hv *hv = to_kvm_hv(kvm); - u64 gfn; - int idx; - - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || - tsc_page_update_unsafe(hv)) - return; mutex_lock(&hv->hv_lock); - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - goto out_unlock; + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET && + !tsc_page_update_unsafe(hv)) + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; - /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ - if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) - hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; - - gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - - hv->tsc_ref.tsc_sequence = 0; - - /* - * Take the srcu lock as memslots will be accessed to check the gfn - * cache generation against the memslots generation. - */ - idx = srcu_read_lock(&kvm->srcu); - if (kvm_write_guest(kvm, gfn_to_gpa(gfn), - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) - hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; - srcu_read_unlock(&kvm->srcu, idx); - -out_unlock: mutex_unlock(&hv->hv_lock); } - static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) { if (!hv_vcpu->enforce_cpuid) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e19c00ee9ab3..da2737f2a956 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -137,7 +137,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); -void kvm_hv_invalidate_tsc_page(struct kvm *kvm); +void kvm_hv_request_tsc_page_update(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index de49a88df1c2..547ba00ef64f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2901,7 +2901,7 @@ static void kvm_end_pvclock_update(struct kvm *kvm) static void kvm_update_masterclock(struct kvm *kvm) { - kvm_hv_invalidate_tsc_page(kvm); + kvm_hv_request_tsc_page_update(kvm); kvm_start_pvclock_update(kvm); pvclock_update_vm_gtod_copy(kvm); kvm_end_pvclock_update(kvm); @@ -3113,8 +3113,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) offsetof(struct compat_vcpu_info, time)); if (vcpu->xen.vcpu_time_info_set) kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0); - if (!v->vcpu_idx) - kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); + kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); return 0; } @@ -6241,7 +6240,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) if (data.flags & ~KVM_CLOCK_VALID_FLAGS) return -EINVAL; - kvm_hv_invalidate_tsc_page(kvm); + kvm_hv_request_tsc_page_update(kvm); kvm_start_pvclock_update(kvm); pvclock_update_vm_gtod_copy(kvm); From 6f83ab22adcb77a5824d2c274dace0d99e21319f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Apr 2022 09:48:30 -0600 Subject: [PATCH 519/708] io_uring: io_kiocb_update_pos() should not touch file for non -1 offset -1 tells use to use the current position, but we check if the file is a stream regardless of that. Fix up io_kiocb_update_pos() to only dip into file if we need to. This is both more efficient and also drops 12 bytes of text on aarch64 and 64 bytes on x86-64. Fixes: b4aec4001595 ("io_uring: do not recalculate ppos unnecessarily") Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f060ad018ba4..b4a5e2a6aa9c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3183,19 +3183,18 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; - bool is_stream = req->file->f_mode & FMODE_STREAM; - if (kiocb->ki_pos == -1) { - if (!is_stream) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = req->file->f_pos; - return &kiocb->ki_pos; - } else { - kiocb->ki_pos = 0; - return NULL; - } + if (kiocb->ki_pos != -1) + return &kiocb->ki_pos; + + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + return &kiocb->ki_pos; } - return is_stream ? NULL : &kiocb->ki_pos; + + kiocb->ki_pos = 0; + return NULL; } static void kiocb_done(struct io_kiocb *req, ssize_t ret, From 2804ecd8d3e3730b4f999cc1ff4b2441e1f4d513 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Apr 2022 17:03:26 -0600 Subject: [PATCH 520/708] io_uring: move apoll->events cache In preparation for fixing a regression with pulling in an extra cacheline for IO that doesn't usually touch the last cacheline of the io_kiocb, move the cached location of apoll->events to space shared with some other completion data. Like cflags, this isn't used until after the request has been completed, so we can piggy back on top of comp_list. Fixes: 81459350d581 ("io_uring: cache req->apoll->events in req->cflags") Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b4a5e2a6aa9c..3a97535d0550 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -916,8 +916,12 @@ struct io_kiocb { /* store used ubuf, so we can prevent reloading */ struct io_mapped_ubuf *imu; - /* used by request caches, completion batching and iopoll */ - struct io_wq_work_node comp_list; + union { + /* used by request caches, completion batching and iopoll */ + struct io_wq_work_node comp_list; + /* cache ->apoll->events */ + int apoll_events; + }; atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; @@ -5833,7 +5837,6 @@ static void io_poll_remove_entries(struct io_kiocb *req) static int io_poll_check_events(struct io_kiocb *req, bool locked) { struct io_ring_ctx *ctx = req->ctx; - struct io_poll_iocb *poll = io_poll_get_single(req); int v; /* req->task == current here, checking PF_EXITING is safe */ @@ -5850,17 +5853,17 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) return -ECANCELED; if (!req->result) { - struct poll_table_struct pt = { ._key = req->cflags }; + struct poll_table_struct pt = { ._key = req->apoll_events }; if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) req->result = -EBADF; else - req->result = vfs_poll(req->file, &pt) & req->cflags; + req->result = vfs_poll(req->file, &pt) & req->apoll_events; } /* multishot, just fill an CQE and proceed */ - if (req->result && !(req->cflags & EPOLLONESHOT)) { - __poll_t mask = mangle_poll(req->result & poll->events); + if (req->result && !(req->apoll_events & EPOLLONESHOT)) { + __poll_t mask = mangle_poll(req->result & req->apoll_events); bool filled; spin_lock(&ctx->completion_lock); @@ -5938,7 +5941,7 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, int events) * CPU. We want to avoid pulling in req->apoll->events for that * case. */ - req->cflags = events; + req->apoll_events = events; if (req->opcode == IORING_OP_POLL_ADD) req->io_task_work.func = io_poll_task_func; else @@ -6330,7 +6333,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return -EINVAL; io_req_set_refcount(req); - req->cflags = poll->events = io_poll_parse_events(sqe, flags); + req->apoll_events = poll->events = io_poll_parse_events(sqe, flags); return 0; } From 82733d168cbd3fe9dab603f05894316b99008924 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 10 Apr 2022 19:05:09 -0600 Subject: [PATCH 521/708] io_uring: stop using io_wq_work as an fd placeholder There are two reasons why this isn't the best idea: - It's an odd area to grab a bit of storage space, hence it's an odd area to grab storage from. - It puts the 3rd io_kiocb cacheline into the hot path, where normal hot path just needs the first two. Use 'cflags' for joint fd/cflags storage. We only need fd until we successfully issue, and we only need cflags once a request is done and is completed. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Jens Axboe --- fs/io-wq.h | 1 - fs/io_uring.c | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index 04d374e65e54..dbecd27656c7 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -155,7 +155,6 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) struct io_wq_work { struct io_wq_work_node list; unsigned flags; - int fd; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3a97535d0550..38e62b1c6297 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -907,7 +907,11 @@ struct io_kiocb { u64 user_data; u32 result; - u32 cflags; + /* fd initially, then cflags for completion */ + union { + u32 cflags; + int fd; + }; struct io_ring_ctx *ctx; struct task_struct *task; @@ -7090,9 +7094,9 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) return true; if (req->flags & REQ_F_FIXED_FILE) - req->file = io_file_get_fixed(req, req->work.fd, issue_flags); + req->file = io_file_get_fixed(req, req->fd, issue_flags); else - req->file = io_file_get_normal(req, req->work.fd); + req->file = io_file_get_normal(req, req->fd); if (req->file) return true; @@ -7630,7 +7634,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[opcode].needs_file) { struct io_submit_state *state = &ctx->submit_state; - req->work.fd = READ_ONCE(sqe->fd); + req->fd = READ_ONCE(sqe->fd); /* * Plug now if we have more than 2 IO left after this, and the From 5ad7f18cd82cee8e773d40cc7a1465a526f2615c Mon Sep 17 00:00:00 2001 From: Tomas Melin Date: Thu, 7 Apr 2022 19:16:59 +0300 Subject: [PATCH 522/708] net: macb: Restart tx only if queue pointer is lagging commit 4298388574da ("net: macb: restart tx after tx used bit read") added support for restarting transmission. Restarting tx does not work in case controller asserts TXUBR interrupt and TQBP is already at the end of the tx queue. In that situation, restarting tx will immediately cause assertion of another TXUBR interrupt. The driver will end up in an infinite interrupt loop which it cannot break out of. For cases where TQBP is at the end of the tx queue, instead only clear TX_USED interrupt. As more data gets pushed to the queue, transmission will resume. This issue was observed on a Xilinx Zynq-7000 based board. During stress test of the network interface, driver would get stuck on interrupt loop within seconds or minutes causing CPU to stall. Signed-off-by: Tomas Melin Tested-by: Claudiu Beznea Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220407161659.14532-1-tomas.melin@vaisala.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 800d5ced5800..e475be29845c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1658,6 +1658,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; + unsigned int head_idx, tbqp; if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); @@ -1665,6 +1666,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp); + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp)); + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head)); + + if (tbqp == head_idx) + return; + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } From 868e6139c5212e7d9de8332806aacfeafb349320 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Sun, 27 Mar 2022 11:33:16 -0600 Subject: [PATCH 523/708] block: move lower_48_bits() to block The function is not generally applicable enough to be included in the core kernel header. Move it to block since it's the only subsystem using it. Suggested-by: Linus Torvalds Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220327173316.315-1-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/kernel.h | 9 --------- include/linux/t10-pi.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 08ba5995aa8b..a890428bcc1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -63,15 +63,6 @@ } \ ) -/** - * lower_48_bits() - return bits 0-47 of a number - * @n: the number we're accessing - */ -static inline u64 lower_48_bits(u64 n) -{ - return n & ((1ull << 48) - 1); -} - /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index a4b1af581f69..248f4ac95642 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -59,6 +59,15 @@ struct crc64_pi_tuple { __u8 ref_tag[6]; }; +/** + * lower_48_bits() - return bits 0-47 of a number + * @n: the number we're accessing + */ +static inline u64 lower_48_bits(u64 n) +{ + return n & ((1ull << 48) - 1); +} + static inline u64 ext_pi_ref_tag(struct request *rq) { unsigned int shift = ilog2(queue_logical_block_size(rq->q)); From b1871fd48efc567650dbdc974e5a2342a03fe0d2 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:33 +0200 Subject: [PATCH 524/708] net/smc: use memcpy instead of snprintf to avoid out of bounds read Using snprintf() to convert not null-terminated strings to null terminated strings may cause out of bounds read in the source string. Therefore use memcpy() and terminate the target string with a null afterwards. Fixes: fa0866625543 ("net/smc: add support for user defined EIDs") Fixes: 3c572145c24e ("net/smc: add generic netlink support for system EID") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ce27399b38b1..f9f3f59c79de 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -191,7 +191,8 @@ static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, flags, SMC_NETLINK_DUMP_UEID); if (!hdr) return -ENOMEM; - snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); + memcpy(ueid_str, ueid, SMC_MAX_EID_LEN); + ueid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { genlmsg_cancel(skb, hdr); return -EMSGSIZE; @@ -252,7 +253,8 @@ int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) goto end; smc_ism_get_system_eid(&seid); - snprintf(seid_str, sizeof(seid_str), "%s", seid); + memcpy(seid_str, seid, SMC_MAX_EID_LEN); + seid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) goto err; read_lock(&smc_clc_eid_table.lock); From d22f4f977236f97e01255a80bca2ea93a8094fc8 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:34 +0200 Subject: [PATCH 525/708] net/smc: Fix NULL pointer dereference in smc_pnet_find_ib() dev_name() was called with dev.parent as argument but without to NULL-check it before. Solve this by checking the pointer before the call to dev_name(). Fixes: af5f60c7e3d5 ("net/smc: allow PCI IDs as ib device names in the pnet table") Reported-by: syzbot+03e3e228510223dabd34@syzkaller.appspotmail.com Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_pnet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7984f8883472..7055ed10e316 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -311,8 +311,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } From 49b7d376abe54a49e8bd5e64824032b7c97c62d4 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Fri, 8 Apr 2022 17:10:35 +0200 Subject: [PATCH 526/708] net/smc: Fix af_ops of child socket pointing to released memory Child sockets may inherit the af_ops from the parent listen socket. When the listen socket is released then the af_ops of the child socket points to released memory. Solve that by restoring the original af_ops for child sockets which inherited the parent af_ops. And clear any inherited user_data of the parent socket. Fixes: 8270d9c21041 ("net/smc: Limit backlog connections") Reviewed-by: Wenjia Zhang Signed-off-by: Karsten Graul Reviewed-by: D. Wythe Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f0d118e9f155..14ddc40149e8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -121,6 +121,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, bool *own_req) { struct smc_sock *smc; + struct sock *child; smc = smc_clcsock_user_data(sk); @@ -134,8 +135,17 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, } /* passthrough to original syn recv sock fct */ - return smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, - own_req); + child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, + own_req); + /* child must not inherit smc or its ops */ + if (child) { + rcu_assign_sk_user_data(child, NULL); + + /* v4-mapped sockets don't inherit parent ops. Don't restore. */ + if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops) + inet_csk(child)->icsk_af_ops = smc->ori_af_ops; + } + return child; drop: dst_release(dst); From 390d645877ffd6dcb55f162d618045b2779217b3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 11 Apr 2022 11:12:50 -0700 Subject: [PATCH 527/708] drm/msm/gpu: Avoid -Wunused-function with !CONFIG_PM_SLEEP When building with CONFIG_PM=y and CONFIG_PM_SLEEP=n (such as ARCH=riscv allmodconfig), the following warnings/errors occur: drivers/gpu/drm/msm/adreno/adreno_device.c:679:12: error: 'adreno_system_resume' defined but not used [-Werror=unused-function] 679 | static int adreno_system_resume(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/adreno/adreno_device.c:655:12: error: 'adreno_system_suspend' defined but not used [-Werror=unused-function] 655 | static int adreno_system_suspend(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors These functions are only used in SET_SYSTEM_SLEEP_PM_OPS(), which evaluates to empty when CONFIG_PM_SLEEP is not set, making these functions unused. To resolve this, use the SYSTEM_SLEEP_PM_OPS() and RUNTIME_PM_OPS() macros, which were introduced in commit 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old ones"). They are designed to avoid these compiler warnings while still guarding their use on CONFIG_PM{,_SLEEP}=y. Fixes: 7e4167c9e021 ("drm/msm/gpu: Park scheduler threads for system suspend") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20220411181249.2758344-1-nathan@kernel.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 661dfa7681fb..8706bcdd1472 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -599,7 +599,6 @@ static const struct of_device_id dt_match[] = { {} }; -#ifdef CONFIG_PM static int adreno_runtime_resume(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); @@ -682,11 +681,9 @@ static int adreno_system_resume(struct device *dev) return pm_runtime_force_resume(dev); } -#endif - static const struct dev_pm_ops adreno_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume) - SET_RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume) + RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL) }; static struct platform_driver adreno_driver = { From f19fe8f354a6e7c2b9588f83af4876e34f0ce83e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 8 Apr 2022 21:37:03 -0700 Subject: [PATCH 528/708] Revert "scsi: scsi_debug: Address races following module load" Revert the patch mentioned in the subject since it blocks I/O after module unload has started while this is a legitimate use case. For e.g. blktests test case srp/001 that patch causes a command timeout to be triggered for the following call stack: __schedule+0x4c3/0xd20 schedule+0x82/0x110 schedule_timeout+0x122/0x200 io_schedule_timeout+0x7b/0xc0 __wait_for_common+0x2bc/0x380 wait_for_completion_io_timeout+0x1d/0x20 blk_execute_rq+0x1db/0x200 __scsi_execute+0x1fb/0x310 sd_sync_cache+0x155/0x2c0 [sd_mod] sd_shutdown+0xbb/0x190 [sd_mod] sd_remove+0x5b/0x80 [sd_mod] device_remove+0x9a/0xb0 device_release_driver_internal+0x2c5/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1aa/0x270 device_del+0x2d4/0x640 __scsi_remove_device+0x168/0x1a0 scsi_forget_host+0xa8/0xb0 scsi_remove_host+0x9b/0x150 sdebug_driver_remove+0x3d/0x140 [scsi_debug] device_remove+0x6f/0xb0 device_release_driver_internal+0x2c5/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1aa/0x270 device_del+0x2d4/0x640 device_unregister+0x18/0x70 sdebug_do_remove_host+0x138/0x180 [scsi_debug] scsi_debug_exit+0x45/0xd5 [scsi_debug] __do_sys_delete_module.constprop.0+0x210/0x320 __x64_sys_delete_module+0x1f/0x30 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Link: https://lore.kernel.org/r/20220409043704.28573-1-bvanassche@acm.org Fixes: 2aad3cd85370 ("scsi: scsi_debug: Address races following module load") Cc: Douglas Gilbert Cc: Yi Zhang Cc: Bob Pearson Reported-by: Yi Zhang Tested-by: Yi Zhang Acked-by: Douglas Gilbert Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 197 ++++++++++---------------------------- 1 file changed, 51 insertions(+), 146 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index ff78ef702f22..592a290e6cfa 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, }; -static atomic_t sdebug_num_hosts; -static DEFINE_MUTEX(add_host_mutex); - +static int sdebug_num_hosts; static int sdebug_add_host = DEF_NUM_HOST; /* in sysfs this is relative */ static int sdebug_ato = DEF_ATO; static int sdebug_cdb_len = DEF_CDB_LEN; @@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL; static bool sdebug_random = DEF_RANDOM; static bool sdebug_per_host_store = DEF_PER_HOST_STORE; static bool sdebug_removable = DEF_REMOVABLE; -static bool sdebug_deflect_incoming; static bool sdebug_clustering; static bool sdebug_host_lock = DEF_HOST_LOCK; static bool sdebug_strict = DEF_STRICT; @@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp) sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN) sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN; - if (smp_load_acquire(&sdebug_deflect_incoming)) { - pr_info("Exit early due to deflect_incoming\n"); - return 1; - } if (devip == NULL) { devip = find_build_dev_info(sdp); if (devip == NULL) @@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd) } /* Deletes (stops) timers or work queues of all queued commands */ -static void stop_all_queued(bool done_with_no_conn) +static void stop_all_queued(void) { unsigned long iflags; int j, k; @@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn) struct sdebug_queued_cmd *sqcp; struct sdebug_dev_info *devip; struct sdebug_defer *sd_dp; - struct scsi_cmnd *scp; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { spin_lock_irqsave(&sqp->qc_lock, iflags); for (k = 0; k < SDEBUG_CANQUEUE; ++k) { if (test_bit(k, sqp->in_use_bm)) { sqcp = &sqp->qc_arr[k]; - scp = sqcp->a_cmnd; - if (!scp) + if (sqcp->a_cmnd == NULL) continue; devip = (struct sdebug_dev_info *) sqcp->a_cmnd->device->hostdata; @@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn) l_defer_t = SDEB_DEFER_NONE; spin_unlock_irqrestore(&sqp->qc_lock, iflags); stop_qc_helper(sd_dp, l_defer_t); - if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) { - scp->result = DID_NO_CONNECT << 16; - scsi_done(scp); - } clear_bit(k, sqp->in_use_bm); spin_lock_irqsave(&sqp->qc_lock, iflags); } @@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) } } spin_unlock(&sdebug_host_list_lock); - stop_all_queued(false); + stop_all_queued(); if (SDEBUG_OPT_RESET_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, "%s: %d device(s) found\n", __func__, k); @@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size) } } -static void sdeb_block_all_queues(void) +static void block_unblock_all_queues(bool block) { int j; struct sdebug_queue *sqp; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) - atomic_set(&sqp->blocked, (int)true); -} - -static void sdeb_unblock_all_queues(void) -{ - int j; - struct sdebug_queue *sqp; - - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) - atomic_set(&sqp->blocked, (int)false); -} - -static void -sdeb_add_n_hosts(int num_hosts) -{ - if (num_hosts < 1) - return; - do { - bool found; - unsigned long idx; - struct sdeb_store_info *sip; - bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store; - - found = false; - if (want_phs) { - xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) { - sdeb_most_recent_idx = (int)idx; - found = true; - break; - } - if (found) /* re-use case */ - sdebug_add_host_helper((int)idx); - else - sdebug_do_add_host(true /* make new store */); - } else { - sdebug_do_add_host(false); - } - } while (--num_hosts); + atomic_set(&sqp->blocked, (int)block); } /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1 @@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void) modulo = abs(sdebug_every_nth); if (modulo < 2) return; - sdeb_block_all_queues(); + block_unblock_all_queues(true); count = atomic_read(&sdebug_cmnd_count); atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo); - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } static void clear_queue_stats(void) @@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void) return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0; } -static int process_deflect_incoming(struct scsi_cmnd *scp) -{ - u8 opcode = scp->cmnd[0]; - - if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16) - return 0; - return DID_NO_CONNECT << 16; -} - #define INCLUSIVE_TIMING_MAX_NS 1000000 /* 1 millisecond */ /* Complete the processing of the thread that queued a SCSI command to this @@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp) */ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, int scsi_result, - int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *), + int (*pfp)(struct scsi_cmnd *, + struct sdebug_dev_info *), int delta_jiff, int ndelay) { bool new_sd_dp; @@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } sdp = cmnd->device; - if (delta_jiff == 0) { - sqp = get_queue(cmnd); - if (atomic_read(&sqp->blocked)) { - if (smp_load_acquire(&sdebug_deflect_incoming)) - return process_deflect_incoming(cmnd); - else - return SCSI_MLQUEUE_HOST_BUSY; - } + if (delta_jiff == 0) goto respond_in_thread; - } sqp = get_queue(cmnd); spin_lock_irqsave(&sqp->qc_lock, iflags); if (unlikely(atomic_read(&sqp->blocked))) { spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (smp_load_acquire(&sdebug_deflect_incoming)) { - scsi_result = process_deflect_incoming(cmnd); - goto respond_in_thread; - } - if (sdebug_verbose) - pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n"); return SCSI_MLQUEUE_HOST_BUSY; } num_in_q = atomic_read(&devip->num_in_q); @@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, respond_in_thread: /* call back to mid-layer using invocation thread */ cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0; cmnd->result &= ~SDEG_RES_IMMED_MASK; - if (cmnd->result == 0 && scsi_result != 0) { + if (cmnd->result == 0 && scsi_result != 0) cmnd->result = scsi_result; - if (sdebug_verbose) - pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n", - blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result); - } scsi_done(cmnd); return 0; } @@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; - sdeb_block_all_queues(); + block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { k = find_first_bit(sqp->in_use_bm, @@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, sdebug_jdelay = jdelay; sdebug_ndelay = 0; } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } return res; } @@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; - sdeb_block_all_queues(); + block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { k = find_first_bit(sqp->in_use_bm, @@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, sdebug_jdelay = ndelay ? JDELAY_OVERRIDDEN : DEF_JDELAY; } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } return res; } @@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) && (n <= SDEBUG_CANQUEUE) && (sdebug_host_max_queue == 0)) { - sdeb_block_all_queues(); + block_unblock_all_queues(true); k = 0; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { @@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, atomic_set(&retired_max_queue, k + 1); else atomic_set(&retired_max_queue, 0); - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return count; } return -EINVAL; @@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb); static ssize_t add_host_show(struct device_driver *ddp, char *buf) { /* absolute number of hosts currently active is what is shown */ - return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts)); + return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts); } -/* - * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'. - * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing. - * Returns -EBUSY if another add_host sysfs invocation is active. - */ static ssize_t add_host_store(struct device_driver *ddp, const char *buf, size_t count) { + bool found; + unsigned long idx; + struct sdeb_store_info *sip; + bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store; int delta_hosts; - if (count == 0 || kstrtoint(buf, 0, &delta_hosts)) + if (sscanf(buf, "%d", &delta_hosts) != 1) return -EINVAL; - if (sdebug_verbose) - pr_info("prior num_hosts=%d, num_to_add=%d\n", - atomic_read(&sdebug_num_hosts), delta_hosts); - if (delta_hosts == 0) - return count; - if (mutex_trylock(&add_host_mutex) == 0) - return -EBUSY; if (delta_hosts > 0) { - sdeb_add_n_hosts(delta_hosts); - } else if (delta_hosts < 0) { - smp_store_release(&sdebug_deflect_incoming, true); - sdeb_block_all_queues(); - if (delta_hosts >= atomic_read(&sdebug_num_hosts)) - stop_all_queued(true); do { - if (atomic_read(&sdebug_num_hosts) < 1) { - free_all_queued(); - break; + found = false; + if (want_phs) { + xa_for_each_marked(per_store_ap, idx, sip, + SDEB_XA_NOT_IN_USE) { + sdeb_most_recent_idx = (int)idx; + found = true; + break; + } + if (found) /* re-use case */ + sdebug_add_host_helper((int)idx); + else + sdebug_do_add_host(true); + } else { + sdebug_do_add_host(false); } + } while (--delta_hosts); + } else if (delta_hosts < 0) { + do { sdebug_do_remove_host(false); } while (++delta_hosts); - sdeb_unblock_all_queues(); - smp_store_release(&sdebug_deflect_incoming, false); } - mutex_unlock(&add_host_mutex); - if (sdebug_verbose) - pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts)); return count; } static DRIVER_ATTR_RW(add_host); @@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void) sdebug_add_host = 0; for (k = 0; k < hosts_to_add; k++) { - if (smp_load_acquire(&sdebug_deflect_incoming)) { - pr_info("exit early as sdebug_deflect_incoming is set\n"); - return 0; - } if (want_store && k == 0) { ret = sdebug_add_host_helper(idx); if (ret < 0) { @@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void) } } if (sdebug_verbose) - pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts)); + pr_info("built %d host(s)\n", sdebug_num_hosts); - /* - * Even though all the hosts have been established, due to async device (LU) scanning - * by the scsi mid-level, there may still be devices (LUs) being set up. - */ return 0; bus_unreg: @@ -7131,17 +7041,12 @@ free_q_arr: static void __exit scsi_debug_exit(void) { - int k; + int k = sdebug_num_hosts; - /* Possible race with LUs still being set up; stop them asap */ - sdeb_block_all_queues(); - smp_store_release(&sdebug_deflect_incoming, true); - stop_all_queued(false); - for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++) + stop_all_queued(); + for (; k; k--) sdebug_do_remove_host(true); free_all_queued(); - if (sdebug_verbose) - pr_info("removed %d hosts\n", k); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); @@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx) sdbg_host->dev.bus = &pseudo_lld_bus; sdbg_host->dev.parent = pseudo_primary; sdbg_host->dev.release = &sdebug_release_adapter; - dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts)); + dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); if (error) goto clean; - atomic_inc(&sdebug_num_hosts); + ++sdebug_num_hosts; return 0; clean: @@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end) return; device_unregister(&sdbg_host->dev); - atomic_dec(&sdebug_num_hosts); + --sdebug_num_hosts; } static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) @@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) int num_in_q = 0; struct sdebug_dev_info *devip; - sdeb_block_all_queues(); + block_unblock_all_queues(true); devip = (struct sdebug_dev_info *)sdev->hostdata; if (NULL == devip) { - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return -ENODEV; } num_in_q = atomic_read(&devip->num_in_q); @@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n", __func__, qdepth, num_in_q); } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return sdev->queue_depth; } From 294080eacf92a0781e6d43663448a55001ec8c64 Mon Sep 17 00:00:00 2001 From: Ajish Koshy Date: Mon, 11 Apr 2022 12:16:02 +0530 Subject: [PATCH 529/708] scsi: pm80xx: Mask and unmask upper interrupt vectors 32-63 When upper inbound and outbound queues 32-63 are enabled, we see upper vectors 32-63 in interrupt service routine. We need corresponding registers to handle masking and unmasking of these upper interrupts. To achieve this, we use registers MSGU_ODMR_U(0x34) to mask and MSGU_ODMR_CLR_U(0x3C) to unmask the interrupts. In these registers bit 0-31 represents interrupt vectors 32-63. Link: https://lore.kernel.org/r/20220411064603.668448-2-Ajish.Koshy@microchip.com Fixes: 05c6c029a44d ("scsi: pm80xx: Increase number of supported queues") Reviewed-by: John Garry Acked-by: Jack Wang Signed-off-by: Ajish Koshy Signed-off-by: Viswas G Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f90b707c190b..f13e2ed90699 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -1727,10 +1727,11 @@ static void pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - mask = (u32)(1 << vec); - - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF)); + if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec); + else + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_enable(pm8001_ha); @@ -1746,12 +1747,15 @@ static void pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - if (vec == 0xFF) - mask = 0xFFFFFFFF; + if (vec == 0xFF) { + /* disable all vectors 0-31, 32-63 */ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF); + } else if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec); else - mask = (u32)(1 << vec); - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF)); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_disable(pm8001_ha); From bcd8a45223470e00b5f254018174d64a75db4bbe Mon Sep 17 00:00:00 2001 From: Ajish Koshy Date: Mon, 11 Apr 2022 12:16:03 +0530 Subject: [PATCH 530/708] scsi: pm80xx: Enable upper inbound, outbound queues Executing driver on servers with more than 32 CPUs were faced with command timeouts. This is because we were not geting completions for commands submitted on IQ32 - IQ63. Set E64Q bit to enable upper inbound and outbound queues 32 to 63 in the MPI main configuration table. Added 500ms delay after successful MPI initialization as mentioned in controller datasheet. Link: https://lore.kernel.org/r/20220411064603.668448-3-Ajish.Koshy@microchip.com Fixes: 05c6c029a44d ("scsi: pm80xx: Increase number of supported queues") Reviewed-by: Damien Le Moal Acked-by: Jack Wang Signed-off-by: Ajish Koshy Signed-off-by: Viswas G Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f13e2ed90699..01c5e8ff4cc5 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -766,6 +766,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity = 0x01; pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt = 0x01; + /* Enable higher IQs and OQs, 32 to 63, bit 16 */ + if (pm8001_ha->max_q_num > 32) + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + 1 << 16; /* Disable end to end CRC checking */ pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16); @@ -1027,6 +1031,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha) if (0x0000 != gst_len_mpistate) return -EBUSY; + /* + * As per controller datasheet, after successful MPI + * initialization minimum 500ms delay is required before + * issuing commands. + */ + msleep(500); + return 0; } From c34f95e98d8fb750eefd4f3fe58b4f8b5e89253b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:05 -0500 Subject: [PATCH 531/708] scsi: iscsi: Move iscsi_ep_disconnect() This patch moves iscsi_ep_disconnect() so it can be called earlier in the next patch. Link: https://lore.kernel.org/r/20220408001314.5014-2-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 27951ea05dd4..4e10457e3ab9 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2217,6 +2217,25 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + conn->state = ISCSI_CONN_FAILED; + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + session->transport->unbind_conn(conn, is_active); + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2257,25 +2276,6 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, return 0; } -static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) -{ - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); - struct iscsi_endpoint *ep; - - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; - - if (!conn->ep || !session->transport->ep_disconnect) - return; - - ep = conn->ep; - conn->ep = NULL; - - session->transport->unbind_conn(conn, is_active); - session->transport->ep_disconnect(ep); - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); -} - static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, From cbd2283aaf47fef4ded4b29124b1ef3beb515f3a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:06 -0500 Subject: [PATCH 532/708] scsi: iscsi: Fix offload conn cleanup when iscsid restarts When userspace restarts during boot or upgrades it won't know about the offload driver's endpoint and connection mappings. iscsid will start by cleaning up the old session by doing a stop_conn call. Later, if we are able to create a new connection, we clean up the old endpoint during the binding stage. The problem is that if we do stop_conn before doing the ep_disconnect call offload, drivers can still be executing I/O. We then might free tasks from the under the card/driver. This moves the ep_disconnect call to before we do the stop_conn call for this case. It will then work and look like a normal recovery/cleanup procedure from the driver's point of view. Link: https://lore.kernel.org/r/20220408001314.5014-3-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 48 +++++++++++++++++------------ 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 4e10457e3ab9..bf39fb5569b6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2236,6 +2236,23 @@ static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); } +static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, + struct iscsi_endpoint *ep, + bool is_active) +{ + /* Check if this was a conn error and the kernel took ownership */ + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + iscsi_ep_disconnect(conn, is_active); + } else { + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); + mutex_unlock(&conn->ep_mutex); + + flush_work(&conn->cleanup_work); + + mutex_lock(&conn->ep_mutex); + } +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2256,6 +2273,16 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { + /* + * For offload, when iscsid is restarted it won't know about + * existing endpoints so it can't do a ep_disconnect. We clean + * it up here for userspace. + */ + mutex_lock(&conn->ep_mutex); + if (conn->ep) + iscsi_if_disconnect_bound_ep(conn, conn->ep, true); + mutex_unlock(&conn->ep_mutex); + /* * Figure out if it was the kernel or userspace initiating this. */ @@ -2984,16 +3011,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, } mutex_lock(&conn->ep_mutex); - /* Check if this was a conn error and the kernel took ownership */ - if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); - mutex_unlock(&conn->ep_mutex); - - flush_work(&conn->cleanup_work); - goto put_ep; - } - - iscsi_ep_disconnect(conn, false); + iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); @@ -3704,16 +3722,6 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: - if (conn->ep) { - /* - * For offload boot support where iscsid is restarted - * during the pivot root stage, the ep will be intact - * here when the new iscsid instance starts up and - * reconnects. - */ - iscsi_ep_disconnect(conn, true); - } - session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; From 3c6ae371b8a1ffba1fc415989fd581ebf841ed0a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:07 -0500 Subject: [PATCH 533/708] scsi: iscsi: Release endpoint ID when its freed We can't release the endpoint ID until all references to the endpoint have been dropped or it could be allocated while in use. This has us use an idr instead of looping over all conns to find a free ID and then free the ID when all references have been dropped instead of when the device is only deleted. Link: https://lore.kernel.org/r/20220408001314.5014-4-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Reviewed-by: Wu Bo Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 71 ++++++++++++++--------------- include/scsi/scsi_transport_iscsi.h | 2 +- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index bf39fb5569b6..1fc7c6bfbd67 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -86,6 +86,9 @@ struct iscsi_internal { struct transport_container session_cont; }; +static DEFINE_IDR(iscsi_ep_idr); +static DEFINE_MUTEX(iscsi_ep_idr_mutex); + static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_conn_cleanup_workq; @@ -168,6 +171,11 @@ struct device_attribute dev_attr_##_prefix##_##_name = \ static void iscsi_endpoint_release(struct device *dev) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); + + mutex_lock(&iscsi_ep_idr_mutex); + idr_remove(&iscsi_ep_idr, ep->id); + mutex_unlock(&iscsi_ep_idr_mutex); + kfree(ep); } @@ -180,7 +188,7 @@ static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id); + return sysfs_emit(buf, "%d\n", ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); @@ -193,48 +201,32 @@ static struct attribute_group iscsi_endpoint_group = { .attrs = iscsi_endpoint_attrs, }; -#define ISCSI_MAX_EPID -1 - -static int iscsi_match_epid(struct device *dev, const void *data) -{ - struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - const uint64_t *epid = data; - - return *epid == ep->id; -} - struct iscsi_endpoint * iscsi_create_endpoint(int dd_size) { - struct device *dev; struct iscsi_endpoint *ep; - uint64_t id; - int err; - - for (id = 1; id < ISCSI_MAX_EPID; id++) { - dev = class_find_device(&iscsi_endpoint_class, NULL, &id, - iscsi_match_epid); - if (!dev) - break; - else - put_device(dev); - } - if (id == ISCSI_MAX_EPID) { - printk(KERN_ERR "Too many connections. Max supported %u\n", - ISCSI_MAX_EPID - 1); - return NULL; - } + int err, id; ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL); if (!ep) return NULL; + mutex_lock(&iscsi_ep_idr_mutex); + id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO); + if (id < 0) { + mutex_unlock(&iscsi_ep_idr_mutex); + printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", + id); + goto free_ep; + } + mutex_unlock(&iscsi_ep_idr_mutex); + ep->id = id; ep->dev.class = &iscsi_endpoint_class; - dev_set_name(&ep->dev, "ep-%llu", (unsigned long long) id); + dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) - goto free_ep; + goto free_id; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) @@ -248,6 +240,10 @@ unregister_dev: device_unregister(&ep->dev); return NULL; +free_id: + mutex_lock(&iscsi_ep_idr_mutex); + idr_remove(&iscsi_ep_idr, id); + mutex_unlock(&iscsi_ep_idr_mutex); free_ep: kfree(ep); return NULL; @@ -275,14 +271,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint); */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { - struct device *dev; + struct iscsi_endpoint *ep; - dev = class_find_device(&iscsi_endpoint_class, NULL, &handle, - iscsi_match_epid); - if (!dev) - return NULL; + mutex_lock(&iscsi_ep_idr_mutex); + ep = idr_find(&iscsi_ep_idr, handle); + if (!ep) + goto unlock; - return iscsi_dev_to_endpoint(dev); + get_device(&ep->dev); +unlock: + mutex_unlock(&iscsi_ep_idr_mutex); + return ep; } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 38e4a67f5922..fdd486047404 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -295,7 +295,7 @@ extern void iscsi_host_for_each_session(struct Scsi_Host *shost, struct iscsi_endpoint { void *dd_data; /* LLD private data */ struct device dev; - uint64_t id; + int id; struct iscsi_cls_conn *conn; }; From 0aadafb5c34403a7cced1a8d61877048dc059f70 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:08 -0500 Subject: [PATCH 534/708] scsi: iscsi: Fix endpoint reuse regression This patch fixes a bug where when using iSCSI offload we can free an endpoint while userspace still thinks it's active. That then causes the endpoint ID to be reused for a new connection's endpoint while userspace still thinks the ID is for the original connection. Userspace will then end up disconnecting a running connection's endpoint or trying to bind to another connection's endpoint. This bug is a regression added in: Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling") where we added a in kernel ep_disconnect call to fix a bug in: Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") where we would call stop_conn without having done ep_disconnect. This early ep_disconnect call will then free the endpoint and it's ID while userspace still thinks the ID is valid. Fix the early release of the ID by having the in kernel recovery code keep a reference to the endpoint until userspace has called into the kernel to finish cleaning up the endpoint/connection. It requires the previous commit "scsi: iscsi: Release endpoint ID when its freed" which moved the freeing of the ID until when the endpoint is released. Link: https://lore.kernel.org/r/20220408001314.5014-5-michael.christie@oracle.com Fixes: 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling") Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 1fc7c6bfbd67..f200da049f3b 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2247,7 +2247,11 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, mutex_unlock(&conn->ep_mutex); flush_work(&conn->cleanup_work); - + /* + * Userspace is now done with the EP so we can release the ref + * iscsi_cleanup_conn_work_fn took. + */ + iscsi_put_endpoint(ep); mutex_lock(&conn->ep_mutex); } } @@ -2322,6 +2326,12 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) return; } + /* + * Get a ref to the ep, so we don't release its ID until after + * userspace is done referencing it in iscsi_if_disconnect_bound_ep. + */ + if (conn->ep) + get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { From 7c6e99c18167ed89729bf167ccb4a7e3ab3115ba Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:09 -0500 Subject: [PATCH 535/708] scsi: iscsi: Fix conn cleanup and stop race during iscsid restart If iscsid is doing a stop_conn at the same time the kernel is starting error recovery we can hit a race that allows the cleanup work to run on a valid connection. In the race, iscsi_if_stop_conn sees the cleanup bit set, but it calls flush_work on the clean_work before iscsi_conn_error_event has queued it. The flush then returns before the queueing and so the cleanup_work can run later and disconnect/stop a conn while it's in a connected state. The patch: Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") added the late stop_conn call bug originally, and the patch: Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling") attempted to fix it but only fixed the normal EH case and left the above race for the iscsid restart case. For the normal EH case we don't hit the race because we only signal userspace to start recovery after we have done the queueing, so the flush will always catch the queued work or see it completed. For iscsid restart cases like boot, we can hit the race because iscsid will call down to the kernel before the kernel has signaled any error, so both code paths can be running at the same time. This adds a lock around the setting of the cleanup bit and queueing so they happen together. Link: https://lore.kernel.org/r/20220408001314.5014-6-michael.christie@oracle.com Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 17 +++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f200da049f3b..63a4f0c022fd 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2240,9 +2240,12 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, bool is_active) { /* Check if this was a conn error and the kernel took ownership */ + spin_lock_irq(&conn->lock); if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_ep_disconnect(conn, is_active); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex); @@ -2289,9 +2292,12 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, /* * Figure out if it was the kernel or userspace initiating this. */ + spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); @@ -2300,7 +2306,9 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * Only clear for recovery to avoid extra cleanup runs during * termination. */ + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; @@ -2321,7 +2329,9 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) */ if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); return; } @@ -2376,6 +2386,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); + spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; @@ -2578,9 +2589,12 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags; + spin_lock_irqsave(&conn->lock, flags); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -3723,11 +3737,14 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, return -EINVAL; mutex_lock(&conn->ep_mutex); + spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } + spin_unlock_irq(&conn->lock); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index fdd486047404..9acb8422f680 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -211,6 +211,8 @@ struct iscsi_cls_conn { struct mutex ep_mutex; struct iscsi_endpoint *ep; + /* Used when accessing flags and queueing work. */ + spinlock_t lock; unsigned long flags; struct work_struct cleanup_work; From 03690d81974535f228e892a14f0d2d44404fe555 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:10 -0500 Subject: [PATCH 536/708] scsi: iscsi: Fix unbound endpoint error handling If a driver raises a connection error before the connection is bound, we can leave a cleanup_work queued that can later run and disconnect/stop a connection that is logged in. The problem is that drivers can call iscsi_conn_error_event for endpoints that are connected but not yet bound when something like the network port they are using is brought down. iscsi_cleanup_conn_work_fn will check for this and exit early, but if the cleanup_work is stuck behind other works, it might not get run until after userspace has done ep_disconnect. Because the endpoint is not yet bound there was no way for ep_disconnect to flush the work. The bug of leaving stop_conns queued was added in: Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling") and: Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") was supposed to fix it, but left this case. This patch moves the conn state check to before we even queue the work so we can avoid queueing. Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 65 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 63a4f0c022fd..2c0dd64159b0 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2201,10 +2201,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) switch (flag) { case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", @@ -2222,7 +2222,7 @@ static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) struct iscsi_endpoint *ep; ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); if (!conn->ep || !session->transport->ep_disconnect) return; @@ -2321,21 +2321,6 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) struct iscsi_cls_session *session = iscsi_conn_to_session(conn); mutex_lock(&conn->ep_mutex); - /* - * If we are not at least bound there is nothing for us to do. Userspace - * will do a ep_disconnect call if offload is used, but will not be - * doing a stop since there is nothing to clean up, so we have to clear - * the cleanup bit here. - */ - if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { - ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); - spin_lock_irq(&conn->lock); - clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); - spin_unlock_irq(&conn->lock); - mutex_unlock(&conn->ep_mutex); - return; - } - /* * Get a ref to the ep, so we don't release its ID until after * userspace is done referencing it in iscsi_if_disconnect_bound_ep. @@ -2391,7 +2376,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2590,10 +2575,30 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); unsigned long flags; + int state; spin_lock_irqsave(&conn->lock, flags); - if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) - queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + /* + * Userspace will only do a stop call if we are at least bound. And, we + * only need to do the in kernel cleanup if in the UP state so cmds can + * be released to upper layers. If in other states just wait for + * userspace to avoid races that can leave the cleanup_work queued. + */ + state = READ_ONCE(conn->state); + switch (state) { + case ISCSI_CONN_BOUND: + case ISCSI_CONN_UP: + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn->flags)) { + queue_work(iscsi_conn_cleanup_workq, + &conn->cleanup_work); + } + break; + default: + ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", + state); + break; + } spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); @@ -2944,7 +2949,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; - int err = 0, value = 0; + int err = 0, value = 0, state; if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; @@ -2961,8 +2966,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - if ((conn->state == ISCSI_CONN_BOUND) || - (conn->state == ISCSI_CONN_UP)) { + state = READ_ONCE(conn->state); + if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { @@ -3758,7 +3763,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; + WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); if (ev->r.retcode || !transport->ep_connect) break; @@ -3777,7 +3782,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; + WRITE_ONCE(conn->state, ISCSI_CONN_UP); + break; case ISCSI_UEVENT_SEND_PDU: pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); @@ -4084,10 +4090,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; + int conn_state = READ_ONCE(conn->state); - if (conn->state >= 0 && - conn->state < ARRAY_SIZE(connection_state_names)) - state = connection_state_names[conn->state]; + if (conn_state >= 0 && + conn_state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } From 5bd856256f8c03e329f8ff36d8c8efcb111fe6df Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:11 -0500 Subject: [PATCH 537/708] scsi: iscsi: Merge suspend fields Move the tx and rx suspend fields into one flags field. Link: https://lore.kernel.org/r/20220408001314.5014-8-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_hwi.c | 2 +- drivers/scsi/bnx2i/bnx2i_iscsi.c | 2 +- drivers/scsi/cxgbi/libcxgbi.c | 6 +++--- drivers/scsi/libiscsi.c | 20 ++++++++++---------- drivers/scsi/libiscsi_tcp.c | 2 +- include/scsi/libiscsi.h | 9 +++++---- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 7fe7f53a41c0..6c864b093ac9 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1977,7 +1977,7 @@ static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn) if (nopin->cq_req_sn != qp->cqe_exp_seq_sn) break; - if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx))) { + if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { if (nopin->op_code == ISCSI_OP_NOOP_IN && nopin->itt == (u16) RESERVED_ITT) { printk(KERN_ALERT "bnx2i: Unsolicited " diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index fe86fd61a995..15fbd09baa94 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1721,7 +1721,7 @@ static int bnx2i_tear_down_conn(struct bnx2i_hba *hba, struct iscsi_conn *conn = ep->conn->cls_conn->dd_data; /* Must suspend all rx queue activity for this ep */ - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); } /* CONN_DISCONNECT timeout may or may not be an issue depending * on what transcribed in TCP layer, different targets behave diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 8c7d4dda4cf2..4365d52c6430 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -1634,11 +1634,11 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk) log_debug(1 << CXGBI_DBG_PDU_RX, "csk 0x%p, conn 0x%p.\n", csk, conn); - if (unlikely(!conn || conn->suspend_rx)) { + if (unlikely(!conn || test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { log_debug(1 << CXGBI_DBG_PDU_RX, - "csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n", + "csk 0x%p, conn 0x%p, id %d, conn flags 0x%lx!\n", csk, conn, conn ? conn->id : 0xFF, - conn ? conn->suspend_rx : 0xFF); + conn ? conn->flags : 0xFF); return; } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index cf4211c6500d..fbf029147f5f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1392,8 +1392,8 @@ static bool iscsi_set_conn_failed(struct iscsi_conn *conn) if (conn->stop_stage == 0) session->state = ISCSI_STATE_FAILED; - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); + set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); return true; } @@ -1454,7 +1454,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task, * Do this after dropping the extra ref because if this was a requeue * it's removed from that list and cleanup_queued_task would miss it. */ - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { /* * Save the task and ref in case we weren't cleaning up this * task and get woken up again. @@ -1532,7 +1532,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) int rc = 0; spin_lock_bh(&conn->session->frwd_lock); - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n"); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1746,7 +1746,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto fault; } - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { reason = FAILURE_SESSION_IN_RECOVERY; sc->result = DID_REQUEUE << 16; goto fault; @@ -1935,7 +1935,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error) void iscsi_suspend_queue(struct iscsi_conn *conn) { spin_lock_bh(&conn->session->frwd_lock); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); spin_unlock_bh(&conn->session->frwd_lock); } EXPORT_SYMBOL_GPL(iscsi_suspend_queue); @@ -1953,7 +1953,7 @@ void iscsi_suspend_tx(struct iscsi_conn *conn) struct Scsi_Host *shost = conn->session->host; struct iscsi_host *ihost = shost_priv(shost); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); if (ihost->workq) flush_workqueue(ihost->workq); } @@ -1961,7 +1961,7 @@ EXPORT_SYMBOL_GPL(iscsi_suspend_tx); static void iscsi_start_tx(struct iscsi_conn *conn) { - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); iscsi_conn_queue_work(conn); } @@ -3329,8 +3329,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, /* * Unblock xmitworker(), Login Phase will pass through. */ - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); return 0; } EXPORT_SYMBOL_GPL(iscsi_conn_bind); diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 2e9ffe3d1a55..883005757ddb 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -927,7 +927,7 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb, */ conn->last_recv = jiffies; - if (unlikely(conn->suspend_rx)) { + if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { ISCSI_DBG_TCP(conn, "Rx suspended!\n"); *status = ISCSI_TCP_SUSPENDED; return 0; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index e76c94697c1b..84086c240228 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -53,8 +53,10 @@ enum { #define ISID_SIZE 6 -/* Connection suspend "bit" */ -#define ISCSI_SUSPEND_BIT 1 +/* Connection flags */ +#define ISCSI_CONN_FLAG_SUSPEND_TX BIT(0) +#define ISCSI_CONN_FLAG_SUSPEND_RX BIT(1) + #define ISCSI_ITT_MASK 0x1fff #define ISCSI_TOTAL_CMDS_MAX 4096 @@ -211,8 +213,7 @@ struct iscsi_conn { struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ struct work_struct xmitwork; /* per-conn. xmit workqueue */ - unsigned long suspend_tx; /* suspend Tx */ - unsigned long suspend_rx; /* suspend Rx */ + unsigned long flags; /* ISCSI_CONN_FLAGs */ /* negotiated params */ unsigned max_recv_dlength; /* initiator_max_recv_dsl*/ From 44ac97109e42f87b1a34954704b81b6c8eca80c4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:12 -0500 Subject: [PATCH 538/708] scsi: iscsi: Fix NOP handling during conn recovery If a offload driver doesn't use the xmit workqueue, then when we are doing ep_disconnect libiscsi can still inject PDUs to the driver. This adds a check for if the connection is bound before trying to inject PDUs. Link: https://lore.kernel.org/r/20220408001314.5014-9-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 7 ++++++- include/scsi/libiscsi.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index fbf029147f5f..797abf4f5399 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -678,7 +678,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, struct iscsi_task *task; itt_t itt; - if (session->state == ISCSI_STATE_TERMINATE) + if (session->state == ISCSI_STATE_TERMINATE || + !test_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags)) return NULL; if (opcode == ISCSI_OP_LOGIN || opcode == ISCSI_OP_TEXT) { @@ -2214,6 +2215,8 @@ void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active) iscsi_suspend_tx(conn); spin_lock_bh(&session->frwd_lock); + clear_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags); + if (!is_active) { /* * if logout timed out before userspace could even send a PDU @@ -3317,6 +3320,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, spin_lock_bh(&session->frwd_lock); if (is_leading) session->leadconn = conn; + + set_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags); spin_unlock_bh(&session->frwd_lock); /* diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 84086c240228..d0a24779c52d 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -56,7 +56,7 @@ enum { /* Connection flags */ #define ISCSI_CONN_FLAG_SUSPEND_TX BIT(0) #define ISCSI_CONN_FLAG_SUSPEND_RX BIT(1) - +#define ISCSI_CONN_FLAG_BOUND BIT(2) #define ISCSI_ITT_MASK 0x1fff #define ISCSI_TOTAL_CMDS_MAX 4096 From 857b06527f707f5df634b854898a191b5c1d0272 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:13 -0500 Subject: [PATCH 539/708] scsi: qedi: Fix failed disconnect handling We set the qedi_ep state to EP_STATE_OFLDCONN_START when the ep is created. Then in qedi_set_path we kick off the offload work. If userspace times out the connection and calls ep_disconnect, qedi will only flush the offload work if the qedi_ep state has transitioned away from EP_STATE_OFLDCONN_START. If we can't connect we will not have transitioned state and will leave the offload work running, and we will free the qedi_ep from under it. This patch just has us init the work when we create the ep, then always flush it. Link: https://lore.kernel.org/r/20220408001314.5014-10-michael.christie@oracle.com Tested-by: Manish Rangankar Reviewed-by: Lee Duncan Reviewed-by: Chris Leech Acked-by: Manish Rangankar Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 69 +++++++++++++++++----------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 8196f89f404e..31ec429104e2 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -860,6 +860,37 @@ static int qedi_task_xmit(struct iscsi_task *task) return qedi_iscsi_send_ioreq(task); } +static void qedi_offload_work(struct work_struct *work) +{ + struct qedi_endpoint *qedi_ep = + container_of(work, struct qedi_endpoint, offload_work); + struct qedi_ctx *qedi; + int wait_delay = 5 * HZ; + int ret; + + qedi = qedi_ep->qedi; + + ret = qedi_iscsi_offload_conn(qedi_ep); + if (ret) { + QEDI_ERR(&qedi->dbg_ctx, + "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", + qedi_ep->iscsi_cid, qedi_ep, ret); + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + return; + } + + ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, + (qedi_ep->state == + EP_STATE_OFLDCONN_COMPL), + wait_delay); + if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) { + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + QEDI_ERR(&qedi->dbg_ctx, + "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", + qedi_ep->iscsi_cid, qedi_ep); + } +} + static struct iscsi_endpoint * qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) @@ -908,6 +939,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, } qedi_ep = ep->dd_data; memset(qedi_ep, 0, sizeof(struct qedi_endpoint)); + INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); qedi_ep->state = EP_STATE_IDLE; qedi_ep->iscsi_cid = (u32)-1; qedi_ep->qedi = qedi; @@ -1056,12 +1088,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) qedi_ep = ep->dd_data; qedi = qedi_ep->qedi; + flush_work(&qedi_ep->offload_work); + if (qedi_ep->state == EP_STATE_OFLDCONN_START) goto ep_exit_recover; - if (qedi_ep->state != EP_STATE_OFLDCONN_NONE) - flush_work(&qedi_ep->offload_work); - if (qedi_ep->conn) { qedi_conn = qedi_ep->conn; abrt_conn = qedi_conn->abrt_conn; @@ -1235,37 +1266,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid) return rc; } -static void qedi_offload_work(struct work_struct *work) -{ - struct qedi_endpoint *qedi_ep = - container_of(work, struct qedi_endpoint, offload_work); - struct qedi_ctx *qedi; - int wait_delay = 5 * HZ; - int ret; - - qedi = qedi_ep->qedi; - - ret = qedi_iscsi_offload_conn(qedi_ep); - if (ret) { - QEDI_ERR(&qedi->dbg_ctx, - "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", - qedi_ep->iscsi_cid, qedi_ep, ret); - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - return; - } - - ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, - (qedi_ep->state == - EP_STATE_OFLDCONN_COMPL), - wait_delay); - if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) { - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - QEDI_ERR(&qedi->dbg_ctx, - "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", - qedi_ep->iscsi_cid, qedi_ep); - } -} - static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) { struct qedi_ctx *qedi; @@ -1381,7 +1381,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) qedi_ep->dst_addr, qedi_ep->dst_port); } - INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); queue_work(qedi->offload_thread, &qedi_ep->offload_work); ret = 0; From 70a3baeec4e89736be932a60d682d7ae27556f5c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Apr 2022 19:13:14 -0500 Subject: [PATCH 540/708] scsi: iscsi: MAINTAINERS: Add Mike Christie as co-maintainer I've been doing a lot of iscsi patches because Oracle is paying me to work on iSCSI again. It was supposed to be temp assignment, but my co-worker that was working on iscsi moved to a new group so it looks like I'm back on this code again. After talking to Chris and Lee this patch adds me back as co-maintainer, so I can help them and people remember to cc me on issues. Link: https://lore.kernel.org/r/20220408001314.5014-11-michael.christie@oracle.com Tested-by: Manish Rangankar Acked-by: Lee Duncan Acked-by: Chris Leech Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..ca9d56121974 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10369,6 +10369,7 @@ F: include/linux/isapnp.h ISCSI M: Lee Duncan M: Chris Leech +M: Mike Christie L: open-iscsi@googlegroups.com L: linux-scsi@vger.kernel.org S: Maintained From 8467dda0c26583547731e7f3ea73fc3856bae3bf Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Sat, 9 Apr 2022 08:36:11 +0200 Subject: [PATCH 541/708] sctp: Initialize daddr on peeled off socket Function sctp_do_peeloff() wrongly initializes daddr of the original socket instead of the peeled off socket, which makes getpeername() return zeroes instead of the primary address. Initialize the new socket instead. Fixes: d570ee490fb1 ("[SCTP]: Correctly set daddr for IPv6 sockets during peeloff") Signed-off-by: Petr Malat Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20220409063611.673193-1-oss@malat.biz Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e1a9600be5e..7b0427658056 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5636,7 +5636,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the From eb9c0d671e9432901b8a453e7915416f22f7f919 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:40 +0200 Subject: [PATCH 542/708] net: lan966x: Update lan966x_ptp_get_nominal_value The clk_per_cfg register represents the value added to the system clock for each clock cycle. The issue is that the default value is wrong, meaning that in case the DUT was a grandmaster then everone in the network was too slow. In case there was a grandmaster, then there is no issue because the DUT will configure clk_per_cfg register based on the master frequency. Fixes: d096459494a887 ("net: lan966x: Add support for ptp clocks") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index ae782778d6dd..0a1041da4384 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -29,10 +29,10 @@ enum { static u64 lan966x_ptp_get_nominal_value(void) { - u64 res = 0x304d2df1; - - res <<= 32; - return res; + /* This is the default value that for each system clock, the time of day + * is increased. It has the format 5.59 nanosecond. + */ + return 0x304d4873ecade305; } int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) From 6476f90aefaf119c47ceccde52327464e813fe26 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:41 +0200 Subject: [PATCH 543/708] net: lan966x: Fix IGMP snooping when frames have vlan tag In case an IGMP frame has a vlan tag, then the function lan966x_hw_offload couldn't figure out that is a IGMP frame. Therefore the SW thinks that the frame was already forward by the HW which is not true. Extend lan966x_hw_offload to pop the vlan tag if are any and then check for IGMP frames. Fixes: 47aeea0d57e80c ("net: lan966x: Implement the callback SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED ") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1f8c67f0261b..958e55596b82 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -446,6 +446,12 @@ static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, ANA_CPU_FWD_CFG_MLD_REDIR_ENA))) return true; + if (eth_type_vlan(skb->protocol)) { + skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + if (skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_IGMP) return false; From d7a947d289dc205fc717c004dcebe33b15305afd Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:42 +0200 Subject: [PATCH 544/708] net: lan966x: Fix when a port's upper is changed. On lan966x it is not allowed to have foreign interfaces under a bridge which already contains lan966x ports. So when a port leaves the bridge it would call switchdev_bridge_port_unoffload which eventually will notify the other ports that bridge left the vlan group but that is not true because the bridge is still part of the vlan group. Therefore when a port leaves the bridge, stop generating replays because already the HW cleared after itself and the other ports don't need to do anything else. Fixes: cf2f60897e921e ("net: lan966x: Add support to offload the forwarding.") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index e3555c94294d..df2bee678559 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -322,8 +322,7 @@ static int lan966x_port_prechangeupper(struct net_device *dev, if (netif_is_bridge_master(info->upper_dev) && !info->linking) switchdev_bridge_port_unoffload(port->dev, port, - &lan966x_switchdev_nb, - &lan966x_switchdev_blocking_nb); + NULL, NULL); return NOTIFY_DONE; } From 269219321eb7d7645a3122cf40a420c5dc655eb9 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 9 Apr 2022 20:41:43 +0200 Subject: [PATCH 545/708] net: lan966x: Stop processing the MAC entry is port is wrong. Currently when getting a new MAC is learn, the HW generates an interrupt. So then the SW will check the new entry and checks if it arrived on a correct port. If it didn't just generate a warning. But this could still crash the system. Therefore stop processing that entry when an issue is seen. Fixes: 5ccd66e01cbef8 ("net: lan966x: add support for interrupts from analyzer") Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ce5970bdcc6a..2679111ef669 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -346,7 +346,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; /* If the entry in SW is found, then there is nothing * to do @@ -392,7 +393,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); if (!mac_entry) From 6c6f9f31ecd47dce1d0dafca4bec8805f9bc97cd Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Apr 2022 10:14:59 +0200 Subject: [PATCH 546/708] netfilter: nf_tables: nft_parse_register can return a negative value Since commit 6e1acfa387b9 ("netfilter: nf_tables: validate registers coming from userspace.") nft_parse_register can return a negative value, but the function prototype is still returning an unsigned int. Fixes: 6e1acfa387b9 ("netfilter: nf_tables: validate registers coming from userspace.") Signed-off-by: Antoine Tenart Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 128ee3b300d6..16c3a39689f4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9363,7 +9363,7 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; From fee2b871d8d6389c9b4bdf9346a99ccc1c98c9b8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 11:31:40 +0200 Subject: [PATCH 547/708] ALSA: core: Add snd_card_free_on_error() helper This is a small helper function to handle the error path more easily when an error happens during the probe for the device with the device-managed card. Since devres releases in the reverser order of the creations, usually snd_card_free() gets called at the last in the probe error path unless it already reached snd_card_register() calls. Due to this nature, when a driver expects the resource releases in card->private_free, this might be called too lately. As a workaround, one should call the probe like: static int __some_probe(...) { // do real probe.... } static int some_probe(...) { return snd_card_free_on_error(dev, __some_probe(dev, ...)); } so that the snd_card_free() is called explicitly at the beginning of the error path from the probe. This function will be used in the upcoming fixes to address the regressions by devres usages. Fixes: e8ad415b7a55 ("ALSA: core: Add managed card creation") Cc: Link: https://lore.kernel.org/r/20220412093141.8008-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/core.h | 1 + sound/core/init.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/include/sound/core.h b/include/sound/core.h index b7e9b58d3c78..6d4cc49584c6 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card); void snd_card_disconnect_sync(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); +int snd_card_free_on_error(struct device *dev, int ret); void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); diff --git a/sound/core/init.c b/sound/core/init.c index 31ba7024e3ad..726a8353201f 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -209,6 +209,12 @@ static void __snd_card_release(struct device *dev, void *data) * snd_card_register(), the very first devres action to call snd_card_free() * is added automatically. In that way, the resource disconnection is assured * at first, then released in the expected order. + * + * If an error happens at the probe before snd_card_register() is called and + * there have been other devres resources, you'd need to free the card manually + * via snd_card_free() call in the error; otherwise it may lead to UAF due to + * devres call orders. You can use snd_card_free_on_error() helper for + * handling it more easily. */ int snd_devm_card_new(struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size, @@ -235,6 +241,28 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid, } EXPORT_SYMBOL_GPL(snd_devm_card_new); +/** + * snd_card_free_on_error - a small helper for handling devm probe errors + * @dev: the managed device object + * @ret: the return code from the probe callback + * + * This function handles the explicit snd_card_free() call at the error from + * the probe callback. It's just a small helper for simplifying the error + * handling with the managed devices. + */ +int snd_card_free_on_error(struct device *dev, int ret) +{ + struct snd_card *card; + + if (!ret) + return 0; + card = devres_find(dev, __snd_card_release, NULL, NULL); + if (card) + snd_card_free(card); + return ret; +} +EXPORT_SYMBOL_GPL(snd_card_free_on_error); + static int snd_card_init(struct snd_card *card, struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size) From 313c7e57035125cb7533b53ddd0bc7aa562b433c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 11:31:41 +0200 Subject: [PATCH 548/708] ALSA: echoaudio: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 9c211bf392bb ("ALSA: echoaudio: Allocate resources with device-managed APIs") Reported-and-tested-by: Zheyu Ma Cc: Link: https://lore.kernel.org/r/CAMhUBjm2AdyEZ_-EgexdNDN7SvY4f89=4=FwAL+c0Mg0O+X50A@mail.gmail.com Link: https://lore.kernel.org/r/20220412093141.8008-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 25b012ef5c3e..c70c3ac4e99a 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1970,8 +1970,8 @@ static int snd_echo_create(struct snd_card *card, } /* constructor */ -static int snd_echo_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2139,6 +2139,11 @@ static int snd_echo_probe(struct pci_dev *pci, return 0; } +static int snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_echo_probe(pci, pci_id)); +} #if defined(CONFIG_PM_SLEEP) From 30de14b1884ba609fc1acfba5b40309e3a6ccefe Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 8 Apr 2022 14:51:26 +0200 Subject: [PATCH 549/708] s390: current_stack_pointer shouldn't be a function s390 defines current_stack_pointer as function while all other architectures use 'register unsigned long asm(""). This make codes like the following from check_stack_object() fail: if (IS_ENABLED(CONFIG_STACK_GROWSUP)) { if ((void *)current_stack_pointer < obj + len) return BAD_STACK; } else { if (obj < (void *)current_stack_pointer) return BAD_STACK; } because this would compare the address of current_stack_pointer() and not the stackpointer value. Reported-by: Karsten Graul Fixes: 2792d84e6da5 ("usercopy: Check valid lifetime via stack depth") Cc: Kees Cook Cc: Vasily Gorbik Cc: Alexander Gordeev Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/entry-common.h | 2 +- arch/s390/include/asm/processor.h | 8 +------- arch/s390/include/asm/stacktrace.h | 2 +- arch/s390/lib/test_unwind.c | 2 +- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index eabab24b71dd..2f0a1cacdf85 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -58,7 +58,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static inline bool on_thread_stack(void) { - return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); } #endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index eee8d96fb38e..ff1e25d515a8 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -200,13 +200,7 @@ unsigned long __get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -static __always_inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; - - asm volatile("la %0,0(15)" : "=a" (sp)); - return sp; -} +register unsigned long current_stack_pointer asm("r15"); static __always_inline unsigned short stap(void) { diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 275f4258fbd5..f8500191993d 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -46,7 +46,7 @@ struct stack_frame { }; /* - * Unlike current_stack_pointer() which simply returns current value of %r15 + * Unlike current_stack_pointer which simply contains the current value of %r15 * current_frame_address() returns function stack frame address, which matches * %r15 upon function invocation. It may differ from %r15 later if function * allocates stack for local variables or new stack frame to call other diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 9bb067321ab4..5a053b393d5c 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -147,7 +147,7 @@ static __always_inline struct pt_regs fake_pt_regs(void) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.gprs[15] = current_stack_pointer(); + regs.gprs[15] = current_stack_pointer; asm volatile( "basr %[psw_addr],0\n" From c68c63429319a923a3f23db64810ba608f5d20f7 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 8 Apr 2022 16:21:43 +0200 Subject: [PATCH 550/708] s390: enable CONFIG_HARDENED_USERCOPY in debug_defconfig Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index e18006971e36..f6dfde577ce8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -692,6 +692,7 @@ CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y +CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y From 0f8da75b51ac863b9435368bd50691718cc454b0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Apr 2022 15:24:43 +0100 Subject: [PATCH 551/708] io_uring: fix assign file locking issue io-wq work cancellation path can't take uring_lock as how it's done on file assignment, we have to handle IO_WQ_WORK_CANCEL first, this fixes encountered hangs. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0d9b9f37841645518503f6a207e509d14a286aba.1649773463.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 38e62b1c6297..8a931eb8a3a6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7277,16 +7277,18 @@ static void io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); - if (!io_assign_file(req, issue_flags)) { - err = -EBADF; - work->flags |= IO_WQ_WORK_CANCEL; - } /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (work->flags & IO_WQ_WORK_CANCEL) { +fail: io_req_task_queue_fail(req, err); return; } + if (!io_assign_file(req, issue_flags)) { + err = -EBADF; + work->flags |= IO_WQ_WORK_CANCEL; + goto fail; + } if (req->flags & REQ_F_FORCE_ASYNC) { bool opcode_poll = def->pollin || def->pollout; From 10b1881a97be240126891cb384bd3bc1869f52d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:25:58 +0200 Subject: [PATCH 552/708] ALSA: galaxy: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 35a245ec0619 ("ALSA: galaxy: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/galaxy/galaxy.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index ea001c80149d..3164eb8510fa 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -478,7 +478,7 @@ static void snd_galaxy_free(struct snd_card *card) galaxy_set_config(galaxy, galaxy->config); } -static int snd_galaxy_probe(struct device *dev, unsigned int n) +static int __snd_galaxy_probe(struct device *dev, unsigned int n) { struct snd_galaxy *galaxy; struct snd_wss *chip; @@ -598,6 +598,11 @@ static int snd_galaxy_probe(struct device *dev, unsigned int n) return 0; } +static int snd_galaxy_probe(struct device *dev, unsigned int n) +{ + return snd_card_free_on_error(dev, __snd_galaxy_probe(dev, n)); +} + static struct isa_driver snd_galaxy_driver = { .match = snd_galaxy_match, .probe = snd_galaxy_probe, From d72458071150b802940204950d0d462ea3c913b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:25:59 +0200 Subject: [PATCH 553/708] ALSA: sc6000: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 111601ff76e9 ("ALSA: sc6000: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sc6000.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 26ab7ff80768..60398fced046 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -537,7 +537,7 @@ static void snd_sc6000_free(struct snd_card *card) sc6000_setup_board(vport, 0); } -static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +static int __snd_sc6000_probe(struct device *devptr, unsigned int dev) { static const int possible_irqs[] = { 5, 7, 9, 10, 11, -1 }; static const int possible_dmas[] = { 1, 3, 0, -1 }; @@ -662,6 +662,11 @@ static int snd_sc6000_probe(struct device *devptr, unsigned int dev) return 0; } +static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +{ + return snd_card_free_on_error(devptr, __snd_sc6000_probe(devptr, dev)); +} + static struct isa_driver snd_sc6000_driver = { .match = snd_sc6000_match, .probe = snd_sc6000_probe, From a8e84a5da18e6d786540aa4ceb6f969d5f1a441d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:00 +0200 Subject: [PATCH 554/708] ALSA: ad1889: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 567f58754109 ("ALSA: ad1889: Allocate resources with device-managed APIs") Link: https://lore.kernel.org/r/20220412102636.16000-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ad1889.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c index bba4dae8dcc7..50e30704bf6f 100644 --- a/sound/pci/ad1889.c +++ b/sound/pci/ad1889.c @@ -844,8 +844,8 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci) } static int -snd_ad1889_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { int err; static int devno; @@ -904,6 +904,12 @@ snd_ad1889_probe(struct pci_dev *pci, return 0; } +static int snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ad1889_probe(pci, pci_id)); +} + static const struct pci_device_id snd_ad1889_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_ANALOG_DEVICES, PCI_DEVICE_ID_AD1889JS) }, { 0, }, From 19401a9441236cfbbbeb1bef4ef4c8668db45dfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:01 +0200 Subject: [PATCH 555/708] ALSA: ali5451: Fix the missing snd_card_free() call at probe error The recent cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 1f0819979248 ("ALSA: ali5451: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ali5451/ali5451.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index 92eb59db106d..2378a39abaeb 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -2124,8 +2124,8 @@ static int snd_ali_create(struct snd_card *card, return 0; } -static int snd_ali_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct snd_ali *codec; @@ -2170,6 +2170,12 @@ static int snd_ali_probe(struct pci_dev *pci, return 0; } +static int snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ali_probe(pci, pci_id)); +} + static struct pci_driver ali5451_driver = { .name = KBUILD_MODNAME, .id_table = snd_ali_ids, From d616a0246da88d811f9f4c3aa83003c05efd3af0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:02 +0200 Subject: [PATCH 556/708] ALSA: als4000: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 0e175f665960 ("ALSA: als4000: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/als4000.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c index 535eccd124be..f33aeb692a11 100644 --- a/sound/pci/als4000.c +++ b/sound/pci/als4000.c @@ -806,8 +806,8 @@ static void snd_card_als4000_free( struct snd_card *card ) snd_als4000_free_gameport(acard); } -static int snd_card_als4000_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -930,6 +930,12 @@ static int snd_card_als4000_probe(struct pci_dev *pci, return 0; } +static int snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_als4000_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_als4000_suspend(struct device *dev) { From 48e8adde8d1c586c799dab123fc1ebc8b8db620f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:03 +0200 Subject: [PATCH 557/708] ALSA: atiixp: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 86bde74dbf09 ("ALSA: atiixp: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-7-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/atiixp.c | 10 ++++++++-- sound/pci/atiixp_modem.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index b8e035d5930d..43d01f1847ed 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -1572,8 +1572,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp *chip; @@ -1623,6 +1623,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index 178dce8ef1e9..8864c4c3c7e1 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -1201,8 +1201,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp_modem *chip; @@ -1247,6 +1247,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, From b093de145bc8769c6e9207947afad9efe102f4f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:04 +0200 Subject: [PATCH 558/708] ALSA: au88x0: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: e44b5b440609 ("ALSA: au88x0: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-8-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/au88x0/au88x0.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index 342ef2a6655e..eb234153691b 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -193,7 +193,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci) // constructor -- see "Constructor" sub-section static int -snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -310,6 +310,12 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vortex_probe(pci, pci_id)); +} + // pci_driver definition static struct pci_driver vortex_driver = { .name = KBUILD_MODNAME, From 49fe36e1c02cb06f66689c888e4e767c31cd259d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:05 +0200 Subject: [PATCH 559/708] ALSA: azt3328: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 8c5823ef31e1 ("ALSA: azt3328: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-9-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/azt3328.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 089050470ff2..7f329dfc5404 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -2427,7 +2427,7 @@ snd_azf3328_create(struct snd_card *card, } static int -snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2520,6 +2520,12 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_azf3328_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static inline void snd_azf3328_suspend_regs(const struct snd_azf3328 *chip, From c79442cc5a38e46597bc647128c8f1de62d80020 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:06 +0200 Subject: [PATCH 560/708] ALSA: ca0106: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 1656fa6ea258 ("ALSA: ca0106: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-10-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ca0106/ca0106_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index 8577f9fa5ea6..cf1bac7a435f 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -1725,8 +1725,8 @@ static int snd_ca0106_midi(struct snd_ca0106 *chip, unsigned int channel) } -static int snd_ca0106_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1786,6 +1786,12 @@ static int snd_ca0106_probe(struct pci_dev *pci, return 0; } +static int snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ca0106_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_ca0106_suspend(struct device *dev) { From 9bf5ed9a4e623583f15202d99f4521bc39050f61 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:07 +0200 Subject: [PATCH 561/708] ALSA: cs4281: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 99041fea70d0 ("ALSA: cs4281: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-11-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cs4281.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index e7367402b84a..0c9cadf7b3b8 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -1827,8 +1827,8 @@ static void snd_cs4281_opl3_command(struct snd_opl3 *opl3, unsigned short cmd, spin_unlock_irqrestore(&opl3->reg_lock, flags); } -static int snd_cs4281_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1888,6 +1888,12 @@ static int snd_cs4281_probe(struct pci_dev *pci, return 0; } +static int snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs4281_probe(pci, pci_id)); +} + /* * Power Management */ From 2a56314798e0227cf51e3d1d184a419dc07bc173 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:08 +0200 Subject: [PATCH 562/708] ALSA: cs5535audio: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). Fixes: 5eba4c646dfe ("ALSA: cs5535audio: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-12-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cs5535audio/cs5535audio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 499fa0148f9a..440b8f9b40c9 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -281,8 +281,8 @@ static int snd_cs5535audio_create(struct snd_card *card, return 0; } -static int snd_cs5535audio_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -331,6 +331,12 @@ static int snd_cs5535audio_probe(struct pci_dev *pci, return 0; } +static int snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs5535audio_probe(pci, pci_id)); +} + static struct pci_driver cs5535audio_driver = { .name = KBUILD_MODNAME, .id_table = snd_cs5535audio_ids, From f37019b6bfe2e13cc536af0e6a42ed62005392ae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:09 +0200 Subject: [PATCH 563/708] ALSA: emu10k1x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 2b377c6b6012 ("ALSA: emu10k1x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-13-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index c49c44dc1082..89043392f3ec 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1491,8 +1491,8 @@ static int snd_emu10k1x_midi(struct emu10k1x *emu) return 0; } -static int snd_emu10k1x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1554,6 +1554,12 @@ static int snd_emu10k1x_probe(struct pci_dev *pci, return 0; } +static int snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_emu10k1x_probe(pci, pci_id)); +} + // PCI IDs static const struct pci_device_id snd_emu10k1x_ids[] = { { PCI_VDEVICE(CREATIVE, 0x0006), 0 }, /* Dell OEM version (EMU10K1) */ From c2dc46932d117a1505f589ad1db3095aa6789058 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:10 +0200 Subject: [PATCH 564/708] ALSA: ens137x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 10ed6eaf9d72 ("ALSA: ens137x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-14-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ens1370.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 2651f0c64c06..94efe347a97a 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -2304,8 +2304,8 @@ static irqreturn_t snd_audiopci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int snd_audiopci_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2369,6 +2369,12 @@ static int snd_audiopci_probe(struct pci_dev *pci, return 0; } +static int snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_audiopci_probe(pci, pci_id)); +} + static struct pci_driver ens137x_driver = { .name = KBUILD_MODNAME, .id_table = snd_audiopci_ids, From bc22628591e5913e67edb3c2a89b97849e30a8f8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:11 +0200 Subject: [PATCH 565/708] ALSA: es1938: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 08e9d3ab4cc1 ("ALSA: es1938: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-15-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/es1938.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c index 00b976f42a3d..e34ec6f89e7e 100644 --- a/sound/pci/es1938.c +++ b/sound/pci/es1938.c @@ -1716,8 +1716,8 @@ static int snd_es1938_mixer(struct es1938 *chip) } -static int snd_es1938_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1796,6 +1796,12 @@ static int snd_es1938_probe(struct pci_dev *pci, return 0; } +static int snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1938_probe(pci, pci_id)); +} + static struct pci_driver es1938_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1938_ids, From de9a01bc95a9e5e36d0659521bb04579053d8566 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:12 +0200 Subject: [PATCH 566/708] ALSA: es1968: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: a7b4cbfdc701 ("ALSA: es1968: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-16-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/es1968.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index 6a8a02a9ecf4..4a7e20bb11bc 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -2741,8 +2741,8 @@ static int snd_es1968_create(struct snd_card *card, /* */ -static int snd_es1968_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2848,6 +2848,12 @@ static int snd_es1968_probe(struct pci_dev *pci, return 0; } +static int snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1968_probe(pci, pci_id)); +} + static struct pci_driver es1968_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1968_ids, From 7f611274a3d1657a67b3fa8cd0cec1dee00e02b4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:13 +0200 Subject: [PATCH 567/708] ALSA: fm801: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 47c413395376 ("ALSA: fm801: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-17-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/fm801.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 9c22ff19e56d..62b3cb126c6d 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1268,8 +1268,8 @@ static int snd_fm801_create(struct snd_card *card, return 0; } -static int snd_card_fm801_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1333,6 +1333,12 @@ static int snd_card_fm801_probe(struct pci_dev *pci, return 0; } +static int snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_fm801_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static const unsigned char saved_regs[] = { FM801_PCM_VOL, FM801_I2S_VOL, FM801_FM_VOL, FM801_REC_SRC, From 4a850a0079ce601c0c4016f4edb7d618e811ed7d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:14 +0200 Subject: [PATCH 568/708] ALSA: ice1724: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 314f6dbb1f33 ("ALSA: ice1724: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-18-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ice1712/ice1724.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index f6275868877a..6fab2ad85bbe 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -2519,8 +2519,8 @@ static int snd_vt1724_create(struct snd_card *card, * */ -static int snd_vt1724_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2662,6 +2662,12 @@ static int snd_vt1724_probe(struct pci_dev *pci, return 0; } +static int snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vt1724_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_vt1724_suspend(struct device *dev) { From 71b21f5f8970a87f034138454ebeff0608d24875 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:15 +0200 Subject: [PATCH 569/708] ALSA: intel8x0: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 7835e0901e24 ("ALSA: intel8x0: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-19-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/intel8x0.c | 10 ++++++++-- sound/pci/intel8x0m.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index a51032b3ac4d..ae285c0a629c 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -3109,8 +3109,8 @@ static int check_default_spdif_aclink(struct pci_dev *pci) return 0; } -static int snd_intel8x0_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0 *chip; @@ -3189,6 +3189,12 @@ static int snd_intel8x0_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0_probe(pci, pci_id)); +} + static struct pci_driver intel8x0_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0_ids, diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 7de3cb2f17b5..2845cc006d0c 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1178,8 +1178,8 @@ static struct shortname_table { { 0 }, }; -static int snd_intel8x0m_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0m *chip; @@ -1225,6 +1225,12 @@ static int snd_intel8x0m_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0m_probe(pci, pci_id)); +} + static struct pci_driver intel8x0m_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0m_ids, From c01b723a56ce18ae66ff18c5803942badc15fbcd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:16 +0200 Subject: [PATCH 570/708] ALSA: korg1212: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: b5cde369b618 ("ALSA: korg1212: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-20-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/korg1212/korg1212.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 5c9e240ff6a9..33b4f95d65b3 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2355,7 +2355,7 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_korg1212_create(card, pci); if (err < 0) - return err; + goto error; strcpy(card->driver, "korg1212"); strcpy(card->shortname, "korg1212"); @@ -2366,10 +2366,14 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver korg1212_driver = { From ae86bf5c2a8d81418eadf1c31dd9253b609e3093 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:17 +0200 Subject: [PATCH 571/708] ALSA: maestro3: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 5c0939253c3c ("ALSA: maestro3: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-21-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/maestro3.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c index 056838ead21d..261850775c80 100644 --- a/sound/pci/maestro3.c +++ b/sound/pci/maestro3.c @@ -2637,7 +2637,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci, /* */ static int -snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2702,6 +2702,12 @@ snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_m3_probe(pci, pci_id)); +} + static struct pci_driver m3_driver = { .name = KBUILD_MODNAME, .id_table = snd_m3_ids, From 348f08de55b149e41a05111d1a713c4484e5a426 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:18 +0200 Subject: [PATCH 572/708] ALSA: riptide: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 546c201a891e ("ALSA: riptide: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-22-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/riptide/riptide.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index 5a987c683c41..b37c877c2c16 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2023,7 +2023,7 @@ static void snd_riptide_joystick_remove(struct pci_dev *pci) #endif static int -snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2124,6 +2124,12 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_riptide_probe(pci, pci_id)); +} + static struct pci_driver driver = { .name = KBUILD_MODNAME, .id_table = snd_riptide_ids, From 55d2d046b23b9bcb907f6b3e38e52113d55085eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:19 +0200 Subject: [PATCH 573/708] ALSA: rme32: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 102e6156ded2 ("ALSA: rme32: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-23-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c index 5b6bd9f0b2f7..9c0ac025e143 100644 --- a/sound/pci/rme32.c +++ b/sound/pci/rme32.c @@ -1875,7 +1875,7 @@ static void snd_rme32_card_free(struct snd_card *card) } static int -snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct rme32 *rme32; @@ -1927,6 +1927,12 @@ snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme32_probe(pci, pci_id)); +} + static struct pci_driver rme32_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme32_ids, From 93b884f8d82f08c7af542703a724cc23cd2d5bfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:20 +0200 Subject: [PATCH 574/708] ALSA: rme96: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: df06df7cc997 ("ALSA: rme96: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-24-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme96.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 8fc811504920..bccb7e0d3d11 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -2430,8 +2430,8 @@ static void snd_rme96_card_free(struct snd_card *card) } static int -snd_rme96_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct rme96 *rme96; @@ -2498,6 +2498,12 @@ snd_rme96_probe(struct pci_dev *pci, return 0; } +static int snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme96_probe(pci, pci_id)); +} + static struct pci_driver rme96_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme96_ids, From b087a381d7386ec95803222d0d9b1ac499550713 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:21 +0200 Subject: [PATCH 575/708] ALSA: sonicvibes: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 2ca6cbde6ad7 ("ALSA: sonicvibes: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-25-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/sonicvibes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c index c8c49881008f..f91cbf6eeca0 100644 --- a/sound/pci/sonicvibes.c +++ b/sound/pci/sonicvibes.c @@ -1387,8 +1387,8 @@ static int snd_sonicvibes_midi(struct sonicvibes *sonic, return 0; } -static int snd_sonic_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1459,6 +1459,12 @@ static int snd_sonic_probe(struct pci_dev *pci, return 0; } +static int snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sonic_probe(pci, pci_id)); +} + static struct pci_driver sonicvibes_driver = { .name = KBUILD_MODNAME, .id_table = snd_sonic_ids, From 27a0963f9cea5be3c68281f07fe82cdf712ef333 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:22 +0200 Subject: [PATCH 576/708] ALSA: via82xx: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: afaf99751d0c ("ALSA: via82xx: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-26-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/via82xx.c | 10 ++++++++-- sound/pci/via82xx_modem.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 65514f7e42d7..361b83fd721e 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2458,8 +2458,8 @@ static int check_dxs_list(struct pci_dev *pci, int revision) return VIA_DXS_48K; }; -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx *chip; @@ -2569,6 +2569,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_ids, diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c index 234f7fbed236..ca7f024bf8ec 100644 --- a/sound/pci/via82xx_modem.c +++ b/sound/pci/via82xx_modem.c @@ -1103,8 +1103,8 @@ static int snd_via82xx_create(struct snd_card *card, } -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx_modem *chip; @@ -1157,6 +1157,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_modem_ids, From 5e154dfb4f9995096aa6d342df75040ae802c17e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:23 +0200 Subject: [PATCH 577/708] ALSA: intel_hdmi: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 854577ac2aea ("ALSA: x86: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-27-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index b00634663346..0d828e35b401 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1652,7 +1652,7 @@ static void hdmi_lpe_audio_free(struct snd_card *card) * This function is called when the i915 driver creates the * hdmi-lpe-audio platform device. */ -static int hdmi_lpe_audio_probe(struct platform_device *pdev) +static int __hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; struct snd_intelhad_card *card_ctx; @@ -1815,6 +1815,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) return 0; } +static int hdmi_lpe_audio_probe(struct platform_device *pdev) +{ + return snd_card_free_on_error(&pdev->dev, __hdmi_lpe_audio_probe(pdev)); +} + static const struct dev_pm_ops hdmi_lpe_audio_pm = { SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume) }; From 2236a3243ff8291e97c70097dd11a0fdb8904380 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:24 +0200 Subject: [PATCH 578/708] ALSA: sis7019: Fix the missing error handling The previous cleanup with devres forgot to replace the snd_card_free() call with the devm version. Moreover, it still needs the manual call of snd_card_free() at the probe error path, otherwise the reverse order of the releases may happen. This patch addresses those issues. Fixes: 499ddc16394c ("ALSA: sis7019: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-28-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/sis7019.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c index 0b722b0e0604..fabe393607f8 100644 --- a/sound/pci/sis7019.c +++ b/sound/pci/sis7019.c @@ -1331,8 +1331,8 @@ static int sis_chip_create(struct snd_card *card, return 0; } -static int snd_sis7019_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct sis7019 *sis; @@ -1352,8 +1352,8 @@ static int snd_sis7019_probe(struct pci_dev *pci, if (!codecs) codecs = SIS_PRIMARY_CODEC_PRESENT; - rc = snd_card_new(&pci->dev, index, id, THIS_MODULE, - sizeof(*sis), &card); + rc = snd_devm_card_new(&pci->dev, index, id, THIS_MODULE, + sizeof(*sis), &card); if (rc < 0) return rc; @@ -1386,6 +1386,12 @@ static int snd_sis7019_probe(struct pci_dev *pci, return 0; } +static int snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sis7019_probe(pci, pci_id)); +} + static struct pci_driver sis7019_driver = { .name = KBUILD_MODNAME, .id_table = snd_sis7019_ids, From f0438155273f057fec9818bc9d1b782ba35cf6a1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:25 +0200 Subject: [PATCH 579/708] ALSA: bt87x: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 9e80ed64a006 ("ALSA: bt87x: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-29-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/bt87x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index d23f93163841..621985bfee5d 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -805,8 +805,8 @@ static int snd_bt87x_detect_card(struct pci_dev *pci) return SND_BT87X_BOARD_UNKNOWN; } -static int snd_bt87x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -889,6 +889,12 @@ static int snd_bt87x_probe(struct pci_dev *pci, return 0; } +static int snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_bt87x_probe(pci, pci_id)); +} + /* default entries for all Bt87x cards - it's not exported */ /* driver_data is set to 0 to call detection */ static const struct pci_device_id snd_bt87x_default_ids[] = { From d04e84b9817c652002f0ee9b42059d41493e9118 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:26 +0200 Subject: [PATCH 580/708] ALSA: lola: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 098fe3d6e775 ("ALSA: lola: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-30-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/lola/lola.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c index 5269a1d396a5..1aa30e90b86a 100644 --- a/sound/pci/lola/lola.c +++ b/sound/pci/lola/lola.c @@ -637,8 +637,8 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev) return 0; } -static int lola_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -687,6 +687,12 @@ static int lola_probe(struct pci_dev *pci, return 0; } +static int lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __lola_probe(pci, pci_id)); +} + /* PCI IDs */ static const struct pci_device_id lola_ids[] = { { PCI_VDEVICE(DIGIGRAM, 0x0001) }, From ab8bce9da6102c575c473c053672547589bc4c59 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:27 +0200 Subject: [PATCH 581/708] ALSA: als300: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 21a9314cf93b ("ALSA: als300: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-31-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/als300.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/als300.c b/sound/pci/als300.c index b86565dcdbe4..c70aff060120 100644 --- a/sound/pci/als300.c +++ b/sound/pci/als300.c @@ -708,7 +708,7 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_als300_create(card, pci, chip_type); if (err < 0) - return err; + goto error; strcpy(card->driver, "ALS300"); if (chip->chip_type == DEVICE_ALS300_PLUS) @@ -723,11 +723,15 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver als300_driver = { From bf4067e8a19eae67c45659a956c361d59251ba57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:28 +0200 Subject: [PATCH 582/708] ALSA: aw2: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 33631012cd06 ("ALSA: aw2: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-32-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/aw2/aw2-alsa.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index d56f126d6fdd..29a4bcdec237 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -275,7 +275,7 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (3) Create main component */ err = snd_aw2_create(card, pci); if (err < 0) - return err; + goto error; /* initialize mutex */ mutex_init(&chip->mtx); @@ -294,13 +294,17 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (6) Register card instance */ err = snd_card_register(card); if (err < 0) - return err; + goto error; /* (7) Set PCI driver data */ pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } /* open callback */ From a59396b1c11823c69c31621198c04def17f3a869 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:29 +0200 Subject: [PATCH 583/708] ALSA: cmipci: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 87e082ad84a7 ("ALSA: cmipci: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-33-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cmipci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index dab801d9d3b4..727db6d43391 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -3247,15 +3247,19 @@ static int snd_cmipci_probe(struct pci_dev *pci, err = snd_cmipci_create(card, pci, dev); if (err < 0) - return err; + goto error; err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } #ifdef CONFIG_PM_SLEEP From 60797a21dd8360a99ba797f8ca587087c07bb54c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:30 +0200 Subject: [PATCH 584/708] ALSA: lx6464es: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 6f16c19b115e ("ALSA: lx6464es: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-34-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/lx6464es/lx6464es.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index 168a1084f730..bd9b6148dd6f 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -1019,7 +1019,7 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_lx6464es_create(card, pci); if (err < 0) { dev_err(card->dev, "error during snd_lx6464es_create\n"); - return err; + goto error; } strcpy(card->driver, "LX6464ES"); @@ -1036,12 +1036,16 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; dev_dbg(chip->card->dev, "initialization successful\n"); pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver lx6464es_driver = { From 6ebc16e206aa82ddb0450c907865c55bcb7c0f43 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:31 +0200 Subject: [PATCH 585/708] ALSA: oxygen: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() on the error from the probe callback using a new helper function. Fixes: 596ae97ab0ce ("ALSA: oxygen: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-35-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/oxygen/oxygen_lib.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 4fb3f2484fdb..92ffe9dc20c5 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -576,7 +576,7 @@ static void oxygen_card_free(struct snd_card *card) mutex_destroy(&chip->mutex); } -int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, +static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id, struct module *owner, const struct pci_device_id *ids, int (*get_model)(struct oxygen *chip, @@ -701,6 +701,16 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, pci_set_drvdata(pci, card); return 0; } + +int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, + struct module *owner, + const struct pci_device_id *ids, + int (*get_model)(struct oxygen *chip, + const struct pci_device_id *id)) +{ + return snd_card_free_on_error(&pci->dev, + __oxygen_pci_probe(pci, index, id, owner, ids, get_model)); +} EXPORT_SYMBOL(oxygen_pci_probe); #ifdef CONFIG_PM_SLEEP From e2263f0bf7443a200a5c1c418baefd92f1674600 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:32 +0200 Subject: [PATCH 586/708] ALSA: hdsp: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: d136b8e54f92 ("ALSA: hdsp: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-36-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdsp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 96c12dfb24cf..3db641318d3a 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -5444,17 +5444,21 @@ static int snd_hdsp_probe(struct pci_dev *pci, hdsp->pci = pci; err = snd_hdsp_create(card, hdsp); if (err) - return err; + goto error; strcpy(card->shortname, "Hammerfall DSP"); sprintf(card->longname, "%s at 0x%lx, irq %d", hdsp->card_name, hdsp->port, hdsp->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdsp_driver = { From eab521aebcdeb1c801009503e3a7f8989e3c6b36 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:33 +0200 Subject: [PATCH 587/708] ALSA: hdspm: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: 0195ca5fd1f4 ("ALSA: hdspm: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-37-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index ff06ee82607c..fa1812e7a49d 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -6895,7 +6895,7 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_hdspm_create(card, hdspm); if (err < 0) - return err; + goto error; if (hdspm->io_type != MADIface) { snprintf(card->shortname, sizeof(card->shortname), "%s_%x", @@ -6914,12 +6914,16 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdspm_driver = { From b2aa4f80693b7841e5ac4eadbd2d8cec56b10a51 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:34 +0200 Subject: [PATCH 588/708] ALSA: rme9652: Fix the missing snd_card_free() call at probe error The previous cleanup with devres may lead to the incorrect release orders at the probe error handling due to the devres's nature. Until we register the card, snd_card_free() has to be called at first for releasing the stuff properly when the driver tries to manage and release the stuff via card->private_free(). This patch fixes it by calling snd_card_free() manually on the error from the probe callback. Fixes: b1002b2d41c5 ("ALSA: rme9652: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-38-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/rme9652/rme9652.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 7755e19aa776..1d614fe89a6a 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -2572,7 +2572,7 @@ static int snd_rme9652_probe(struct pci_dev *pci, rme9652->pci = pci; err = snd_rme9652_create(card, rme9652, precise_ptr[dev]); if (err) - return err; + goto error; strcpy(card->shortname, rme9652->card_name); @@ -2580,10 +2580,14 @@ static int snd_rme9652_probe(struct pci_dev *pci, card->shortname, rme9652->port, rme9652->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver rme9652_driver = { From 4fb27190879b82e48ce89a56e9d6c04437dbc065 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:35 +0200 Subject: [PATCH 589/708] ALSA: mtpav: Don't call card private_free at probe error path The card destructor of nm256 driver does merely stopping the running timer, and it's superfluous for the probe error handling. Moreover, calling this via the previous devres change would lead to another problem due to the reverse call order. This patch moves the setup of the private_free callback after the card registration, so that it can be used only after fully set up. Fixes: aa92050f10f0 ("ALSA: mtpav: Allocate resources with device-managed APIs") Link: https://lore.kernel.org/r/20220412102636.16000-39-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/drivers/mtpav.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index 11235baaf6fa..f212f233ea61 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -693,8 +693,6 @@ static int snd_mtpav_probe(struct platform_device *dev) mtp_card->outmidihwport = 0xffffffff; timer_setup(&mtp_card->timer, snd_mtpav_output_timer, 0); - card->private_free = snd_mtpav_free; - err = snd_mtpav_get_RAWMIDI(mtp_card); if (err < 0) return err; @@ -716,6 +714,8 @@ static int snd_mtpav_probe(struct platform_device *dev) if (err < 0) return err; + card->private_free = snd_mtpav_free; + platform_set_drvdata(dev, card); printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port); return 0; From f20ae5074dfb38f23b0c07c62bdf8e7254a0acf8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 12:26:36 +0200 Subject: [PATCH 590/708] ALSA: nm256: Don't call card private_free at probe error path The card destructor of nm256 driver does merely stopping the running streams, and it's superfluous for the probe error handling. Moreover, calling this via the previous devres change would lead to another problem due to the reverse call order. This patch moves the setup of the private_free callback after the card registration, so that it can be used only after fully set up. Fixes: c19935f04784 ("ALSA: nm256: Allocate resources with device-managed APIs") Cc: Link: https://lore.kernel.org/r/20220412102636.16000-40-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/nm256/nm256.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c index c9c178504959..f99a1e96e923 100644 --- a/sound/pci/nm256/nm256.c +++ b/sound/pci/nm256/nm256.c @@ -1573,7 +1573,6 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci) chip->coeffs_current = 0; snd_nm256_init_chip(chip); - card->private_free = snd_nm256_free; // pci_set_master(pci); /* needed? */ return 0; @@ -1680,6 +1679,7 @@ static int snd_nm256_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) return err; + card->private_free = snd_nm256_free; pci_set_drvdata(pci, card); return 0; From 565c5e616e8061b40a2e1d786c418a7ac3503a8d Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 12 Apr 2022 09:30:39 -0700 Subject: [PATCH 591/708] io_uring: move io_uring_rsrc_update2 validation Move validation to be more consistently straight after copy_from_user. This is already done in io_register_rsrc_update and so this removes that redundant check. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220412163042.2788062-2-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a931eb8a3a6..58bfa71fe3b6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11398,8 +11398,6 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, __u32 tmp; int err; - if (up->resv) - return -EINVAL; if (check_add_overflow(up->offset, nr_args, &tmp)) return -EOVERFLOW; err = io_rsrc_node_switch_start(ctx); @@ -11425,6 +11423,8 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, memset(&up, 0, sizeof(up)); if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) return -EFAULT; + if (up.resv) + return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); } From d8a3ba9c143bf89c032deced8a686ffa53b46098 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 12 Apr 2022 09:30:40 -0700 Subject: [PATCH 592/708] io_uring: verify that resv2 is 0 in io_uring_rsrc_update2 Verify that the user does not pass in anything but 0 for this field. Fixes: 992da01aa932 ("io_uring: change registration/upd/rsrc tagging ABI") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220412163042.2788062-3-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 58bfa71fe3b6..e899192ffb77 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6839,6 +6839,7 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) up.nr = 0; up.tags = 0; up.resv = 0; + up.resv2 = 0; io_ring_submit_lock(ctx, needs_lock); ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, @@ -11423,7 +11424,7 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, memset(&up, 0, sizeof(up)); if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) return -EFAULT; - if (up.resv) + if (up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); } @@ -11437,7 +11438,7 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; if (copy_from_user(&up, arg, sizeof(up))) return -EFAULT; - if (!up.nr || up.resv) + if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); } From 6fb53cf8ff2c4713247df523404d24f466b98f52 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 12 Apr 2022 09:30:41 -0700 Subject: [PATCH 593/708] io_uring: verify resv is 0 in ringfd register/unregister Only allow resv field to be 0 in struct io_uring_rsrc_update user arguments. Fixes: e7a6c00dc77a ("io_uring: add support for registering ring file descriptors") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220412163042.2788062-4-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e899192ffb77..a84bfec97d0d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10533,6 +10533,11 @@ static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, break; } + if (reg.resv) { + ret = -EINVAL; + break; + } + if (reg.offset == -1U) { start = 0; end = IO_RINGFD_REG_MAX; @@ -10579,7 +10584,7 @@ static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, ret = -EFAULT; break; } - if (reg.offset >= IO_RINGFD_REG_MAX) { + if (reg.resv || reg.offset >= IO_RINGFD_REG_MAX) { ret = -EINVAL; break; } From d2347b9695dafe5c388a5f9aeb70e27a7a4d29cf Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 12 Apr 2022 09:30:42 -0700 Subject: [PATCH 594/708] io_uring: verify pad field is 0 in io_get_ext_arg Ensure that only 0 is passed for pad here. Fixes: c73ebb685fb6 ("io_uring: add timeout support for io_uring_enter()") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220412163042.2788062-5-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index a84bfec97d0d..6b1a98697dcf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10711,6 +10711,8 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz return -EINVAL; if (copy_from_user(&arg, argp, sizeof(arg))) return -EFAULT; + if (arg.pad) + return -EINVAL; *sig = u64_to_user_ptr(arg.sigmask); *argsz = arg.sigmask_sz; *ts = u64_to_user_ptr(arg.ts); From c40160f2998c897231f8454bf797558d30a20375 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 6 Apr 2022 00:28:15 +0200 Subject: [PATCH 595/708] gcc-plugins: latent_entropy: use /dev/urandom While the latent entropy plugin mostly doesn't derive entropy from get_random_const() for measuring the call graph, when __latent_entropy is applied to a constant, then it's initialized statically to output from get_random_const(). In that case, this data is derived from a 64-bit seed, which means a buffer of 512 bits doesn't really have that amount of compile-time entropy. This patch fixes that shortcoming by just buffering chunks of /dev/urandom output and doling it out as requested. At the same time, it's important that we don't break the use of -frandom-seed, for people who want the runtime benefits of the latent entropy plugin, while still having compile-time determinism. In that case, we detect whether gcc's set_random_seed() has been called by making a call to get_random_seed(noinit=true) in the plugin init function, which is called after set_random_seed() is called but before anything that calls get_random_seed(noinit=false), and seeing if it's zero or not. If it's not zero, we're in deterministic mode, and so we just generate numbers with a basic xorshift prng. Note that we don't detect if -frandom-seed is being used using the documented local_tick variable, because it's assigned via: local_tick = (unsigned) tv.tv_sec * 1000 + tv.tv_usec / 1000; which may well overflow and become -1 on its own, and so isn't reliable: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105171 [kees: The 256 byte rnd_buf size was chosen based on average (250), median (64), and std deviation (575) bytes of used entropy for a defconfig x86_64 build] Fixes: 38addce8b600 ("gcc-plugins: Add latent_entropy plugin") Cc: stable@vger.kernel.org Cc: PaX Team Signed-off-by: Jason A. Donenfeld Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220405222815.21155-1-Jason@zx2c4.com --- scripts/gcc-plugins/latent_entropy_plugin.c | 44 +++++++++++++-------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 589454bce930..8425da41de0d 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = { .help = "disable\tturn off latent entropy instrumentation\n", }; -static unsigned HOST_WIDE_INT seed; -/* - * get_random_seed() (this is a GCC function) generates the seed. - * This is a simple random generator without any cryptographic security because - * the entropy doesn't come from here. - */ +static unsigned HOST_WIDE_INT deterministic_seed; +static unsigned HOST_WIDE_INT rnd_buf[32]; +static size_t rnd_idx = ARRAY_SIZE(rnd_buf); +static int urandom_fd = -1; + static unsigned HOST_WIDE_INT get_random_const(void) { - unsigned int i; - unsigned HOST_WIDE_INT ret = 0; - - for (i = 0; i < 8 * sizeof(ret); i++) { - ret = (ret << 1) | (seed & 1); - seed >>= 1; - if (ret & 1) - seed ^= 0xD800000000000000ULL; + if (deterministic_seed) { + unsigned HOST_WIDE_INT w = deterministic_seed; + w ^= w << 13; + w ^= w >> 7; + w ^= w << 17; + deterministic_seed = w; + return deterministic_seed; } - return ret; + if (urandom_fd < 0) { + urandom_fd = open("/dev/urandom", O_RDONLY); + gcc_assert(urandom_fd >= 0); + } + if (rnd_idx >= ARRAY_SIZE(rnd_buf)) { + gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf)); + rnd_idx = 0; + } + return rnd_buf[rnd_idx++]; } static tree tree_get_random_const(tree type) @@ -537,8 +543,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused, tree type, id; int quals; - seed = get_random_seed(false); - if (in_lto_p) return; @@ -573,6 +577,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const struct plugin_argument * const argv = plugin_info->argv; int i; + /* + * Call get_random_seed() with noinit=true, so that this returns + * 0 in the case where no seed has been passed via -frandom-seed. + */ + deterministic_seed = get_random_seed(true); + static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = { { .base = &latent_entropy_decl, From ce64763c63854b4079f2e036638aa881a1fb3fbc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 8 Apr 2022 12:54:31 +0530 Subject: [PATCH 596/708] testing/selftests/mqueue: Fix mq_perf_tests to free the allocated cpu set The selftest "mqueue/mq_perf_tests.c" use CPU_ALLOC to allocate CPU set. This cpu set is used further in pthread_attr_setaffinity_np and by pthread_create in the code. But in current code, allocated cpu set is not freed. Fix this issue by adding CPU_FREE in the "shutdown" function which is called in most of the error/exit path for the cleanup. There are few error paths which exit without using shutdown. Add a common goto error path with CPU_FREE for these cases. Fixes: 7820b0715b6f ("tools/selftests: add mq_perf_tests") Signed-off-by: Athira Rajeev Signed-off-by: Shuah Khan --- .../testing/selftests/mqueue/mq_perf_tests.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index b019e0b8221c..84fda3b49073 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no) if (in_shutdown++) return; + /* Free the cpu_set allocated using CPU_ALLOC in main function */ + CPU_FREE(cpu_set); + for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i]) { pthread_kill(cpu_threads[i], SIGUSR1); @@ -551,6 +554,12 @@ int main(int argc, char *argv[]) perror("sysconf(_SC_NPROCESSORS_ONLN)"); exit(1); } + + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { @@ -589,7 +598,7 @@ int main(int argc, char *argv[]) cpu_set)) { fprintf(stderr, "Any given CPU may " "only be given once.\n"); - exit(1); + goto err_code; } else CPU_SET_S(cpus_to_pin[cpu], cpu_set_size, cpu_set); @@ -607,7 +616,7 @@ int main(int argc, char *argv[]) queue_path = malloc(strlen(option) + 2); if (!queue_path) { perror("malloc()"); - exit(1); + goto err_code; } queue_path[0] = '/'; queue_path[1] = 0; @@ -622,17 +631,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "Must pass at least one CPU to continuous " "mode.\n"); poptPrintUsage(popt_context, stderr, 0); - exit(1); + goto err_code; } else if (!continuous_mode) { num_cpus_to_pin = 1; cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) - ksft_exit_skip("Not running as root, but almost all tests " - "require root in order to modify\nsystem settings. " - "Exiting.\n"); - max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); if (!max_msgs) @@ -740,4 +744,9 @@ int main(int argc, char *argv[]) sleep(1); } shutdown(0, "", 0); + +err_code: + CPU_FREE(cpu_set); + exit(1); + } From b97687527be85a55e12804c98745c5619eadcc32 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 12 Apr 2022 21:59:16 +0000 Subject: [PATCH 597/708] asm-generic: fix __get_unaligned_be48() on 32 bit platforms While testing the new macros for working with 48 bit containers, I faced a weird problem: 32 + 16: 0x2ef6e8da 0x79e60000 48: 0xffffe8da + 0x79e60000 All the bits starting from the 32nd were getting 1d in 9/10 cases. The debug showed: p[0]: 0x00002e0000000000 p[1]: 0x00002ef600000000 p[2]: 0xffffffffe8000000 p[3]: 0xffffffffe8da0000 p[4]: 0xffffffffe8da7900 p[5]: 0xffffffffe8da79e6 that the value becomes a garbage after the third OR, i.e. on `p[2] << 24`. When the 31st bit is 1 and there's no explicit cast to an unsigned, it's being considered as a signed int and getting sign-extended on OR, so `e8000000` becomes `ffffffffe8000000` and messes up the result. Cast the @p[2] to u64 as well to avoid this. Now: 32 + 16: 0x7ef6a490 0xddc10000 48: 0x7ef6a490 + 0xddc10000 p[0]: 0x00007e0000000000 p[1]: 0x00007ef600000000 p[2]: 0x00007ef6a4000000 p[3]: 0x00007ef6a4900000 p[4]: 0x00007ef6a490dd00 p[5]: 0x00007ef6a490ddc1 Fixes: c2ea5fcf53d5 ("asm-generic: introduce be48 unaligned accessors") Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20220412215220.75677-1-alobakin@pm.me Signed-off-by: Jens Axboe --- include/asm-generic/unaligned.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 8fc637379899..df30f11b4a46 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -143,7 +143,7 @@ static inline void put_unaligned_be48(const u64 val, void *p) static inline u64 __get_unaligned_be48(const u8 *p) { - return (u64)p[0] << 40 | (u64)p[1] << 32 | p[2] << 24 | + return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 | p[3] << 16 | p[4] << 8 | p[5]; } From 932aba1e169090357a77af18850a10c256b50819 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 12 Apr 2022 05:41:00 -0400 Subject: [PATCH 598/708] stat: fix inconsistency between struct stat and struct compat_stat struct stat (defined in arch/x86/include/uapi/asm/stat.h) has 32-bit st_dev and st_rdev; struct compat_stat (defined in arch/x86/include/asm/compat.h) has 16-bit st_dev and st_rdev followed by a 16-bit padding. This patch fixes struct compat_stat to match struct stat. [ Historical note: the old x86 'struct stat' did have that 16-bit field that the compat layer had kept around, but it was changes back in 2003 by "struct stat - support larger dev_t": https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/?id=e95b2065677fe32512a597a79db94b77b90c968d and back in those days, the x86_64 port was still new, and separate from the i386 code, and had already picked up the old version with a 16-bit st_dev field ] Note that we can't change compat_dev_t because it is used by compat_loop_info. Also, if the st_dev and st_rdev values are 32-bit, we don't have to use old_valid_dev to test if the value fits into them. This fixes -EOVERFLOW on filesystems that are on NVMe because NVMe uses the major number 259. Signed-off-by: Mikulas Patocka Cc: Andreas Schwab Cc: Matthew Wilcox Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/x86/include/asm/compat.h | 6 ++---- fs/stat.c | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 7516e4199b3c..20fd0acd7d80 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -28,15 +28,13 @@ typedef u16 compat_ipc_pid_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; diff --git a/fs/stat.c b/fs/stat.c index 7f734be0e57e..5c2c94464e8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -348,9 +348,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat # define choose_32_64(a,b) b #endif -#define valid_dev(x) choose_32_64(old_valid_dev(x),true) -#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) - #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif @@ -359,7 +356,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; - if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) @@ -367,7 +366,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) #endif INIT_STRUCT_STAT_PADDING(tmp); - tmp.st_dev = encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -377,7 +376,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; @@ -665,11 +664,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -679,7 +680,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = old_encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; From 925ca893b4a65177394581737b95d03fea2660f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Apr 2022 07:48:08 +0200 Subject: [PATCH 599/708] ALSA: memalloc: Add fallback SG-buffer allocations for x86 The recent change for memory allocator replaced the SG-buffer handling helper for x86 with the standard non-contiguous page handler. This works for most cases, but there is a corner case I obviously overlooked, namely, the fallback of non-contiguous handler without IOMMU. When the system runs without IOMMU, the core handler tries to use the continuous pages with a single SGL entry. It works nicely for most cases, but when the system memory gets fragmented, the large allocation may fail frequently. Ideally the non-contig handler could deal with the proper SG pages, it's cumbersome to extend for now. As a workaround, here we add new types for (minimalistic) SG allocations, instead, so that the allocator falls back to those types automatically when the allocation with the standard API failed. BTW, one better (but pretty minor) improvement from the previous SG-buffer code is that this provides the proper mmap support without the PCM's page fault handling. Fixes: 2c95b92ecd92 ("ALSA: memalloc: Unify x86 SG-buffer handling (take#3)") BugLink: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/2272 BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1198248 Cc: Link: https://lore.kernel.org/r/20220413054808.7547-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/memalloc.h | 5 ++ sound/core/memalloc.c | 111 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 653dfffb3ac8..8d79cebf95f3 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -51,6 +51,11 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ #define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC #endif +/* fallback types, don't use those directly */ +#ifdef CONFIG_SND_DMA_SGBUF +#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK 10 +#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK 11 +#endif /* * info for buffer allocation diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 6fd763d4d15b..15dc7160ba34 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -499,6 +499,10 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { }; #endif /* CONFIG_X86 */ +#ifdef CONFIG_SND_DMA_SGBUF +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); +#endif + /* * Non-contiguous pages allocator */ @@ -509,8 +513,18 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); - if (!sgt) + if (!sgt) { +#ifdef CONFIG_SND_DMA_SGBUF + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + else + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + return snd_dma_sg_fallback_alloc(dmab, size); +#else return NULL; +#endif + } + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); @@ -633,6 +647,8 @@ static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size) if (!p) return NULL; + if (dmab->dev.type != SNDRV_DMA_TYPE_DEV_WC_SG) + return p; for_each_sgtable_page(sgt, &iter, 0) set_memory_wc(sg_wc_address(&iter), 1); return p; @@ -665,6 +681,95 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { .get_page = snd_dma_noncontig_get_page, .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; + +/* Fallback SG-buffer allocations for x86 */ +struct snd_dma_sg_fallback { + size_t count; + struct page **pages; + dma_addr_t *addrs; +}; + +static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, + struct snd_dma_sg_fallback *sgbuf) +{ + size_t i; + + if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wb(sgbuf->pages, sgbuf->count); + for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) + dma_free_coherent(dmab->dev.dev, PAGE_SIZE, + page_address(sgbuf->pages[i]), + sgbuf->addrs[i]); + kvfree(sgbuf->pages); + kvfree(sgbuf->addrs); + kfree(sgbuf); +} + +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + struct snd_dma_sg_fallback *sgbuf; + struct page **pages; + size_t i, count; + void *p; + + sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); + if (!sgbuf) + return NULL; + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + goto error; + sgbuf->pages = pages; + sgbuf->addrs = kvcalloc(count, sizeof(*sgbuf->addrs), GFP_KERNEL); + if (!sgbuf->addrs) + goto error; + + for (i = 0; i < count; sgbuf->count++, i++) { + p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE, + &sgbuf->addrs[i], DEFAULT_GFP); + if (!p) + goto error; + sgbuf->pages[i] = virt_to_page(p); + } + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wc(pages, count); + p = vmap(pages, count, VM_MAP, PAGE_KERNEL); + if (!p) + goto error; + dmab->private_data = sgbuf; + return p; + + error: + __snd_dma_sg_fallback_free(dmab, sgbuf); + return NULL; +} + +static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) +{ + vunmap(dmab->area); + __snd_dma_sg_fallback_free(dmab, dmab->private_data); +} + +static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, + struct vm_area_struct *area) +{ + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); + return vm_map_pages(area, sgbuf->pages, sgbuf->count); +} + +static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { + .alloc = snd_dma_sg_fallback_alloc, + .free = snd_dma_sg_fallback_free, + .mmap = snd_dma_sg_fallback_mmap, + /* reuse vmalloc helpers */ + .get_addr = snd_dma_vmalloc_get_addr, + .get_page = snd_dma_vmalloc_get_page, + .get_chunk_size = snd_dma_vmalloc_get_chunk_size, +}; #endif /* CONFIG_SND_DMA_SGBUF */ /* @@ -736,6 +841,10 @@ static const struct snd_malloc_ops *dma_ops[] = { #ifdef CONFIG_GENERIC_ALLOCATOR [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops, #endif /* CONFIG_GENERIC_ALLOCATOR */ +#ifdef CONFIG_SND_DMA_SGBUF + [SNDRV_DMA_TYPE_DEV_SG_FALLBACK] = &snd_dma_sg_fallback_ops, + [SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK] = &snd_dma_sg_fallback_ops, +#endif #endif /* CONFIG_HAS_DMA */ }; From 24d0c9f0e7de95fe3e3e0067cbea1cd5d413244b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Apr 2022 15:07:40 +0200 Subject: [PATCH 600/708] ALSA: usb-audio: Limit max buffer and period sizes per time In the previous fix, we increased the max buffer bytes from 1MB to 4MB so that we can use bigger buffers for the modern HiFi devices with higher rates, more channels and wider formats. OTOH, extending this has a concern that too big buffer is allowed for the lower rates, less channels and narrower formats; when an application tries to allocate as big buffer as possible, it'll lead to unexpectedly too huge size. Also, we had a problem about the inconsistent max buffer and period bytes for the implicit feedback mode when both streams have different channels. This was fixed by the (relatively complex) patch to reduce the max buffer and period bytes accordingly. This is an alternative fix for those, a patch to kill two birds with one stone (*): instead of increasing the max buffer bytes blindly and applying the reduction per channels, we simply use the hw constraints for the buffer and period "time". Meanwhile the max buffer and period bytes are set unlimited instead. Since the inconsistency of buffer (and period) bytes comes from the difference of the channels in the tied streams, as long as we care only about the buffer (and period) time, it doesn't matter; the buffer time is same for different channels, although we still allow higher buffer size. Similarly, this will allow more buffer bytes for HiFi devices while it also keeps the reasonable size for the legacy devices, too. As of this patch, the max period and buffer time are set to 1 and 2 seconds, which should be large enough for all possible use cases. (*) No animals were harmed in the making of this patch. Fixes: 98c27add5d96 ("ALSA: usb-audio: Cap upper limits of buffer/period bytes for implicit fb") Fixes: fee2ec8cceb3 ("ALSA: usb-audio: Increase max buffer size") Link: https://lore.kernel.org/r/20220412130740.18933-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 101 +++++++----------------------------------------- 1 file changed, 14 insertions(+), 87 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 37ee6df8b15a..6d699065e81a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -659,9 +659,6 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) #define hwc_debug(fmt, args...) do { } while(0) #endif -#define MAX_BUFFER_BYTES (4 * 1024 * 1024) -#define MAX_PERIOD_BYTES (512 * 1024) - static const struct snd_pcm_hardware snd_usb_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -672,9 +669,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, - .buffer_bytes_max = MAX_BUFFER_BYTES, + .buffer_bytes_max = INT_MAX, /* limited by BUFFER_TIME later */ .period_bytes_min = 64, - .period_bytes_max = MAX_PERIOD_BYTES, + .period_bytes_max = INT_MAX, /* limited by PERIOD_TIME later */ .periods_min = 2, .periods_max = 1024, }; @@ -974,78 +971,6 @@ static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, ep->cur_buffer_periods); } -/* get the adjusted max buffer (or period) bytes that can fit with the - * paired format for implicit fb - */ -static unsigned int -get_adjusted_max_bytes(struct snd_usb_substream *subs, - struct snd_usb_substream *pair, - struct snd_pcm_hw_params *params, - unsigned int max_bytes, - bool reverse_map) -{ - const struct audioformat *fp, *pp; - unsigned int rmax = 0, r; - - list_for_each_entry(fp, &subs->fmt_list, list) { - if (!fp->implicit_fb) - continue; - if (!reverse_map && - !hw_check_valid_format(subs, params, fp)) - continue; - list_for_each_entry(pp, &pair->fmt_list, list) { - if (pp->iface != fp->sync_iface || - pp->altsetting != fp->sync_altsetting || - pp->ep_idx != fp->sync_ep_idx) - continue; - if (reverse_map && - !hw_check_valid_format(pair, params, pp)) - break; - if (!reverse_map && pp->channels > fp->channels) - r = max_bytes * fp->channels / pp->channels; - else if (reverse_map && pp->channels < fp->channels) - r = max_bytes * pp->channels / fp->channels; - else - r = max_bytes; - rmax = max(rmax, r); - break; - } - } - return rmax; -} - -/* Reduce the period or buffer bytes depending on the paired substream; - * when a paired configuration for implicit fb has a higher number of channels, - * we need to reduce the max size accordingly, otherwise it may become unusable - */ -static int hw_rule_bytes_implicit_fb(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_usb_substream *subs = rule->private; - struct snd_usb_substream *pair; - struct snd_interval *it; - unsigned int max_bytes; - unsigned int rmax; - - pair = &subs->stream->substream[!subs->direction]; - if (!pair->ep_num) - return 0; - - if (rule->var == SNDRV_PCM_HW_PARAM_PERIOD_BYTES) - max_bytes = MAX_PERIOD_BYTES; - else - max_bytes = MAX_BUFFER_BYTES; - - rmax = get_adjusted_max_bytes(subs, pair, params, max_bytes, false); - if (!rmax) - rmax = get_adjusted_max_bytes(pair, subs, params, max_bytes, true); - if (!rmax) - return 0; - - it = hw_param_interval(params, rule->var); - return apply_hw_params_minmax(it, 0, rmax); -} - /* * set up the runtime hardware information. */ @@ -1139,6 +1064,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; } + /* set max period and buffer sizes for 1 and 2 seconds, respectively */ + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_TIME, + 0, 1000000); + if (err < 0) + return err; + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_BUFFER_TIME, + 0, 2000000); + if (err < 0) + return err; + /* additional hw constraints for implicit fb */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, hw_rule_format_implicit_fb, subs, @@ -1160,16 +1097,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, - hw_rule_bytes_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1); - if (err < 0) - return err; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, - hw_rule_bytes_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_PERIOD_BYTES, -1); - if (err < 0) - return err; list_for_each_entry(fp, &subs->fmt_list, list) { if (fp->implicit_fb) { From f034fc50d3c7d9385c20d505ab4cf56b8fd18ac7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 11 Apr 2022 09:17:58 +0300 Subject: [PATCH 601/708] perf tools: Fix misleading add event PMU debug message Fix incorrect debug message: Attempting to add event pmu 'intel_pt' with '' that may result in non-fatal errors which always appears with perf record -vv and intel_pt e.g. perf record -vv -e intel_pt//u uname The message is incorrect because there will never be non-fatal errors. Suppress the message if the PMU is 'selectable' i.e. meant to be selected directly as an event. Fixes: 4ac22b484d4c79e8 ("perf parse-events: Make add PMU verbose output clearer") Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20220411061758.2458417-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 24997925ae00..dd84fed698a3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1523,7 +1523,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_uncore_alias; LIST_HEAD(config_terms); - if (verbose > 1) { + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); + + if (verbose > 1 && !(pmu && pmu->selectable)) { fprintf(stderr, "Attempting to add event pmu '%s' with '", name); if (head_config) { @@ -1536,7 +1538,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; From 5c7d28c6f6d4e739bafb92f913ec8ff982239c0e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 6 Mar 2022 00:24:20 +0100 Subject: [PATCH 602/708] power: supply: samsung-sdi-battery: Add missing charge restart voltages Two of the batteries were missing charging restart voltages, meaning they can drain if the algorithm relies on restarting charging at this voltage. Fix it up. Fixes: c8aee3f41cb8 ("power: supply: Static data for Samsung batteries") Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/samsung-sdi-battery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/samsung-sdi-battery.c b/drivers/power/supply/samsung-sdi-battery.c index 9d59f277f519..b33daab798b9 100644 --- a/drivers/power/supply/samsung-sdi-battery.c +++ b/drivers/power/supply/samsung-sdi-battery.c @@ -824,6 +824,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = { .constant_charge_current_max_ua = 900000, .constant_charge_voltage_max_uv = 4200000, .charge_term_current_ua = 200000, + .charge_restart_voltage_uv = 4170000, .maintenance_charge = samsung_maint_charge_table, .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table), .alert_low_temp_charge_current_ua = 300000, @@ -867,6 +868,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = { .constant_charge_current_max_ua = 1500000, .constant_charge_voltage_max_uv = 4350000, .charge_term_current_ua = 120000, + .charge_restart_voltage_uv = 4300000, .maintenance_charge = samsung_maint_charge_table, .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table), .alert_low_temp_charge_current_ua = 300000, From 581045ed5cfa42ed7f5364d6ccbcb6fcc077ffcf Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Tue, 29 Mar 2022 03:34:03 +0000 Subject: [PATCH 603/708] power: supply: Reset err after not finding static battery Otherwise power_supply_get_battery_info always returns -ENODEV on devices that do not have a static battery, even when a simple battery is found. Fixes: c8aee3f41cb8 ("power: supply: Static data for Samsung batteries") Signed-off-by: Yassine Oudjana Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index ea02c8dcd748..d925cb137e12 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -604,6 +604,12 @@ int power_supply_get_battery_info(struct power_supply *psy, err = samsung_sdi_battery_get_info(&psy->dev, value, &info); if (!err) goto out_ret_pointer; + else if (err == -ENODEV) + /* + * Device does not have a static battery. + * Proceed to look for a simple battery. + */ + err = 0; if (strcmp("simple-battery", value)) { err = -ENODEV; From e16b859872b87650bb55b12cca5a5fcdc49c1442 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 12 Apr 2022 11:34:57 +0200 Subject: [PATCH 604/708] macvlan: Fix leaking skb in source mode with nodst option The MACVLAN receive handler clones skbs to all matching source MACVLAN interfaces, before it passes the packet along to match on destination based MACVLANs. When using the MACVLAN nodst mode, passing the packet to destination based MACVLANs is omitted and the handler returns with RX_HANDLER_CONSUMED. However, the passed skb is not freed, leaking for any packet processed with the nodst option. Properly free the skb when consuming packets to fix that leak. Fixes: 427f0c8c194b ("macvlan: Add nodst option to macvlan type source") Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 069e8824c264..b00bc8173abe 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; *pskb = skb; eth = eth_hdr(skb); - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } src = macvlan_hash_lookup(port, eth->h_source); if (src && src->mode != MACVLAN_MODE_VEPA && src->mode != MACVLAN_MODE_BRIDGE) { @@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; } - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); From 762c2998c9625f642f0d23da7d3f7e4f90665fdf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Apr 2022 12:44:26 +0300 Subject: [PATCH 605/708] Revert "net: dsa: setup master before ports" This reverts commit 11fd667dac315ea3f2469961f6d2869271a46cae. dsa_slave_change_mtu() updates the MTU of the DSA master and of the associated CPU port, but only if it detects a change to the master MTU. The blamed commit in the Fixes: tag below addressed a regression where dsa_slave_change_mtu() would return early and not do anything due to ds->ops->port_change_mtu() not being implemented. However, that commit also had the effect that the master MTU got set up to the correct value by dsa_master_setup(), but the associated CPU port's MTU did not get updated. This causes breakage for drivers that rely on the ->port_change_mtu() DSA call to account for the tagging overhead on the CPU port, and don't set up the initial MTU during the setup phase. Things actually worked before because they were in a fragile equilibrium where dsa_slave_change_mtu() was called before dsa_master_setup() was. So dsa_slave_change_mtu() could actually detect a change and update the CPU port MTU too. Restore the code to the way things used to work by reverting the reorder of dsa_tree_setup_master() and dsa_tree_setup_ports(). That change did not have a concrete motivation going for it anyway, it just looked better. Fixes: 066dfc429040 ("Revert "net: dsa: stop updating master MTU from master.c"") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ca6af86964bc..cf933225df32 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -562,7 +562,6 @@ static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; - struct net_device *slave; if (!dp->setup) return; @@ -584,11 +583,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - slave = dp->slave; - - if (slave) { + if (dp->slave) { + dsa_slave_destroy(dp->slave); dp->slave = NULL; - dsa_slave_destroy(slave); } break; } @@ -1147,17 +1144,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_cpu_ports; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) goto teardown_switches; - err = dsa_tree_setup_ports(dst); + err = dsa_tree_setup_master(dst); if (err) - goto teardown_master; + goto teardown_ports; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_ports; + goto teardown_master; dst->setup = true; @@ -1165,10 +1162,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_ports: - dsa_tree_teardown_ports(dst); teardown_master: dsa_tree_teardown_master(dst); +teardown_ports: + dsa_tree_teardown_ports(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: @@ -1186,10 +1183,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_ports(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); From 3d2504524531990b32a0629cc984db44f399d161 Mon Sep 17 00:00:00 2001 From: Dylan Hung Date: Tue, 12 Apr 2022 19:48:59 +0800 Subject: [PATCH 606/708] net: ftgmac100: access hardware register after clock ready AST2600 MAC register 0x58 is writable only when the MAC clock is enabled. Usually, the MAC clock is enabled by the bootloader so register 0x58 is set normally when the bootloader is involved. To make ast2600 ftgmac100 work without the bootloader, postpone the register write until the clock is ready. Fixes: 137d23cea1c0 ("net: ftgmac100: Fix Aspeed ast2600 TX hang issue") Signed-off-by: Dylan Hung Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index d5356db7539a..caf48023f8ea 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1835,11 +1835,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->rxdes0_edorr_mask = BIT(30); priv->txdes0_edotr_mask = BIT(30); priv->is_aspeed = true; - /* Disable ast2600 problematic HW arbitration */ - if (of_device_is_compatible(np, "aspeed,ast2600-mac")) { - iowrite32(FTGMAC100_TM_DEFAULT, - priv->base + FTGMAC100_OFFSET_TM); - } } else { priv->rxdes0_edorr_mask = BIT(15); priv->txdes0_edotr_mask = BIT(15); @@ -1911,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev) err = ftgmac100_setup_clk(priv); if (err) goto err_phy_connect; + + /* Disable ast2600 problematic HW arbitration */ + if (of_device_is_compatible(np, "aspeed,ast2600-mac")) + iowrite32(FTGMAC100_TM_DEFAULT, + priv->base + FTGMAC100_OFFSET_TM); } /* Default ring sizes */ From 5209aed5137880fa229746cb521f715e55596460 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 7 Apr 2022 21:23:08 +0200 Subject: [PATCH 607/708] random: allow partial reads if later user copies fail Rather than failing entirely if a copy_to_user() fails at some point, instead we should return a partial read for the amount that succeeded prior, unless none succeeded at all, in which case we return -EFAULT as before. This makes it consistent with other reader interfaces. For example, the following snippet for /dev/zero outputs "4" followed by "1": int fd; void *x = mmap(NULL, 4096, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); assert(x != MAP_FAILED); fd = open("/dev/zero", O_RDONLY); assert(fd >= 0); printf("%zd\n", read(fd, x, 4)); printf("%zd\n", read(fd, x + 4095, 4)); close(fd); This brings that same standard behavior to the various RNG reader interfaces. While we're at it, we can streamline the loop logic a little bit. Suggested-by: Linus Torvalds Cc: Jann Horn Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e15063d61460..df43c5060f00 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -523,8 +523,7 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0; - size_t len; + size_t len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; @@ -543,37 +542,40 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * the user directly. */ if (nbytes <= CHACHA_KEY_SIZE) { - ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes); goto out_zero_chacha; } - do { + for (;;) { chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; + left = copy_to_user(buf, output, len); + if (left) { + ret += len - left; break; } - nbytes -= len; buf += len; ret += len; + nbytes -= len; + if (!nbytes) + break; BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); - if (!(ret % PAGE_SIZE) && nbytes) { + if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } - } while (nbytes); + } memzero_explicit(output, sizeof(output)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); - return ret; + return ret ? ret : -EFAULT; } /* From b0c3e796f24b588b862b61ce235d3c9417dc8983 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 8 Apr 2022 18:14:57 +0200 Subject: [PATCH 608/708] random: make random_get_entropy() return an unsigned long Some implementations were returning type `unsigned long`, while others that fell back to get_cycles() were implicitly returning a `cycles_t` or an untyped constant int literal. That makes for weird and confusing code, and basically all code in the kernel already handled it like it was an `unsigned long`. I recently tried to handle it as the largest type it could be, a `cycles_t`, but doing so doesn't really help with much. Instead let's just make random_get_entropy() return an unsigned long all the time. This also matches the commonly used `arch_get_random_long()` function, so now RDRAND and RDTSC return the same sized integer, which means one can fallback to the other more gracefully. Cc: Dominik Brodowski Cc: Theodore Ts'o Acked-by: Thomas Gleixner Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 20 +++++++------------- include/linux/timex.h | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index df43c5060f00..6b01b2be9dd4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1018,7 +1018,7 @@ int __init rand_initialize(void) */ void add_device_randomness(const void *buf, size_t size) { - cycles_t cycles = random_get_entropy(); + unsigned long cycles = random_get_entropy(); unsigned long flags, now = jiffies; if (crng_init == 0 && size) @@ -1049,8 +1049,7 @@ struct timer_rand_state { */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - cycles_t cycles = random_get_entropy(); - unsigned long flags, now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; spin_lock_irqsave(&input_pool.lock, flags); @@ -1339,8 +1338,7 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; - cycles_t cycles = random_get_entropy(); - unsigned long now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; @@ -1353,16 +1351,12 @@ void add_interrupt_randomness(int irq) if (cycles == 0) cycles = get_reg(fast_pool, regs); - if (sizeof(cycles) == 8) + if (sizeof(unsigned long) == 8) { irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - else { + irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; + } else { irq_data.u32[0] = cycles ^ irq; irq_data.u32[1] = now; - } - - if (sizeof(unsigned long) == 8) - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - else { irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; irq_data.u32[3] = get_reg(fast_pool, regs); } @@ -1409,7 +1403,7 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - cycles_t cycles; + unsigned long cycles; struct timer_list timer; } stack; diff --git a/include/linux/timex.h b/include/linux/timex.h index 059b18eb1f1f..5745c90c8800 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -75,7 +75,7 @@ * By default we use get_cycles() for this purpose, but individual * architectures may override this in their asm/timex.h header file. */ -#define random_get_entropy() get_cycles() +#define random_get_entropy() ((unsigned long)get_cycles()) #endif /* From 2511e0c87786f333c4665508f421ac99e378c719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 12 Apr 2022 17:55:27 +0200 Subject: [PATCH 609/708] net: dsa: realtek: fix Kconfig to assure consistent driver linkage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel test robot reported a build failure: or1k-linux-ld: drivers/net/dsa/realtek/realtek-smi.o:(.rodata+0x16c): undefined reference to `rtl8366rb_variant' ... with the following build configuration: CONFIG_NET_DSA_REALTEK=y CONFIG_NET_DSA_REALTEK_SMI=y CONFIG_NET_DSA_REALTEK_RTL8365MB=y CONFIG_NET_DSA_REALTEK_RTL8366RB=m The problem here is that the realtek-smi interface driver gets built-in, while the rtl8366rb switch subdriver gets built as a module, hence the symbol rtl8366rb_variant is not reachable when defining the OF device table in the interface driver. The Kconfig dependencies don't help in this scenario because they just say that the subdriver(s) depend on at least one interface driver. In fact, the subdrivers don't depend on the interface drivers at all, and can even be built even in their absence. Somewhat strangely, the interface drivers can also be built in the absence of any subdriver, BUT, if a subdriver IS enabled, then it must be reachable according to the linkage of the interface driver: effectively what the IS_REACHABLE() macro achieves. If it is not reachable, the above kind of linker error will be observed. Rather than papering over the above build error by simply using IS_REACHABLE(), we can do a little better and admit that it is actually the interface drivers that have a dependency on the subdrivers. So this patch does exactly that. Specifically, we ensure that: 1. The interface drivers' Kconfig symbols must have a value no greater than the value of any subdriver Kconfig symbols. 2. The subdrivers should by default enable both interface drivers, since most users probably want at least one of them; those interface drivers can be explicitly disabled however. What this doesn't do is prevent a user from building only a subdriver, without any interface driver. To that end, add an additional line of help in the menu to guide users in the right direction. Link: https://lore.kernel.org/all/202204110757.XIafvVnj-lkp@intel.com/ Reported-by: kernel test robot Fixes: aac94001067d ("net: dsa: realtek: add new mdio interface for drivers") Signed-off-by: Alvin Šipraga Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/Kconfig | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 1aa79735355f..060165a85fb7 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -9,34 +9,46 @@ menuconfig NET_DSA_REALTEK help Select to enable support for Realtek Ethernet switch chips. + Note that at least one interface driver must be enabled for the + subdrivers to be loaded. Moreover, an interface driver cannot achieve + anything without at least one subdriver enabled. + +if NET_DSA_REALTEK + config NET_DSA_REALTEK_MDIO - tristate "Realtek MDIO connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek MDIO interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches configured through MDIO. config NET_DSA_REALTEK_SMI - tristate "Realtek SMI connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek SMI interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches connected through SMI. config NET_DSA_REALTEK_RTL8365MB tristate "Realtek RTL8365MB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL8_4 help Select to enable support for Realtek RTL8365MB-VC and RTL8367S. config NET_DSA_REALTEK_RTL8366RB tristate "Realtek RTL8366RB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL4_A help - Select to enable support for Realtek RTL8366RB + Select to enable support for Realtek RTL8366RB. + +endif From 8e925de60ddaeccb455f0bdad17ce9d8cc2db2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 12 Apr 2022 17:57:49 +0200 Subject: [PATCH 610/708] net: dsa: realtek: don't parse compatible string for RTL8366S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This switch is not even supported, but if someone were to actually put this compatible string "realtek,rtl8366s" in their device tree, they would be greeted with a kernel panic because the probe function would dereference NULL. So let's just remove it. Link: https://lore.kernel.org/all/CACRpkdYdKZs0WExXc3=0yPNOwP+oOV60HRz7SRoGjZvYHaT=1g@mail.gmail.com/ Signed-off-by: Alvin Šipraga Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/realtek-smi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index 2243d3da55b2..6cec559c90ce 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -546,11 +546,6 @@ static const struct of_device_id realtek_smi_of_match[] = { .data = &rtl8366rb_variant, }, #endif - { - /* FIXME: add support for RTL8366S and more */ - .compatible = "realtek,rtl8366s", - .data = NULL, - }, #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) { .compatible = "realtek,rtl8365mb", From ef27324e2cb7bb24542d6cb2571740eefe6b00dc Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 13 Apr 2022 00:04:30 +0800 Subject: [PATCH 611/708] nfc: nci: add flush_workqueue to prevent uaf Our detector found a concurrent use-after-free bug when detaching an NCI device. The main reason for this bug is the unexpected scheduling between the used delayed mechanism (timer and workqueue). The race can be demonstrated below: Thread-1 Thread-2 | nci_dev_up() | nci_open_device() | __nci_request(nci_reset_req) | nci_send_cmd | queue_work(cmd_work) nci_unregister_device() | nci_close_device() | ... del_timer_sync(cmd_timer)[1] | ... | Worker nci_free_device() | nci_cmd_work() kfree(ndev)[3] | mod_timer(cmd_timer)[2] In short, the cleanup routine thought that the cmd_timer has already been detached by [1] but the mod_timer can re-attach the timer [2], even it is already released [3], resulting in UAF. This UAF is easy to trigger, crash trace by POC is like below [ 66.703713] ================================================================== [ 66.703974] BUG: KASAN: use-after-free in enqueue_timer+0x448/0x490 [ 66.703974] Write of size 8 at addr ffff888009fb7058 by task kworker/u4:1/33 [ 66.703974] [ 66.703974] CPU: 1 PID: 33 Comm: kworker/u4:1 Not tainted 5.18.0-rc2 #5 [ 66.703974] Workqueue: nfc2_nci_cmd_wq nci_cmd_work [ 66.703974] Call Trace: [ 66.703974] [ 66.703974] dump_stack_lvl+0x57/0x7d [ 66.703974] print_report.cold+0x5e/0x5db [ 66.703974] ? enqueue_timer+0x448/0x490 [ 66.703974] kasan_report+0xbe/0x1c0 [ 66.703974] ? enqueue_timer+0x448/0x490 [ 66.703974] enqueue_timer+0x448/0x490 [ 66.703974] __mod_timer+0x5e6/0xb80 [ 66.703974] ? mark_held_locks+0x9e/0xe0 [ 66.703974] ? try_to_del_timer_sync+0xf0/0xf0 [ 66.703974] ? lockdep_hardirqs_on_prepare+0x17b/0x410 [ 66.703974] ? queue_work_on+0x61/0x80 [ 66.703974] ? lockdep_hardirqs_on+0xbf/0x130 [ 66.703974] process_one_work+0x8bb/0x1510 [ 66.703974] ? lockdep_hardirqs_on_prepare+0x410/0x410 [ 66.703974] ? pwq_dec_nr_in_flight+0x230/0x230 [ 66.703974] ? rwlock_bug.part.0+0x90/0x90 [ 66.703974] ? _raw_spin_lock_irq+0x41/0x50 [ 66.703974] worker_thread+0x575/0x1190 [ 66.703974] ? process_one_work+0x1510/0x1510 [ 66.703974] kthread+0x2a0/0x340 [ 66.703974] ? kthread_complete_and_exit+0x20/0x20 [ 66.703974] ret_from_fork+0x22/0x30 [ 66.703974] [ 66.703974] [ 66.703974] Allocated by task 267: [ 66.703974] kasan_save_stack+0x1e/0x40 [ 66.703974] __kasan_kmalloc+0x81/0xa0 [ 66.703974] nci_allocate_device+0xd3/0x390 [ 66.703974] nfcmrvl_nci_register_dev+0x183/0x2c0 [ 66.703974] nfcmrvl_nci_uart_open+0xf2/0x1dd [ 66.703974] nci_uart_tty_ioctl+0x2c3/0x4a0 [ 66.703974] tty_ioctl+0x764/0x1310 [ 66.703974] __x64_sys_ioctl+0x122/0x190 [ 66.703974] do_syscall_64+0x3b/0x90 [ 66.703974] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 66.703974] [ 66.703974] Freed by task 406: [ 66.703974] kasan_save_stack+0x1e/0x40 [ 66.703974] kasan_set_track+0x21/0x30 [ 66.703974] kasan_set_free_info+0x20/0x30 [ 66.703974] __kasan_slab_free+0x108/0x170 [ 66.703974] kfree+0xb0/0x330 [ 66.703974] nfcmrvl_nci_unregister_dev+0x90/0xd0 [ 66.703974] nci_uart_tty_close+0xdf/0x180 [ 66.703974] tty_ldisc_kill+0x73/0x110 [ 66.703974] tty_ldisc_hangup+0x281/0x5b0 [ 66.703974] __tty_hangup.part.0+0x431/0x890 [ 66.703974] tty_release+0x3a8/0xc80 [ 66.703974] __fput+0x1f0/0x8c0 [ 66.703974] task_work_run+0xc9/0x170 [ 66.703974] exit_to_user_mode_prepare+0x194/0x1a0 [ 66.703974] syscall_exit_to_user_mode+0x19/0x50 [ 66.703974] do_syscall_64+0x48/0x90 [ 66.703974] entry_SYSCALL_64_after_hwframe+0x44/0xae To fix the UAF, this patch adds flush_workqueue() to ensure the nci_cmd_work is finished before the following del_timer_sync. This combination will promise the timer is actually detached. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Signed-off-by: Lin Ma Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d2537383a3e8..6a193cce2a75 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -560,6 +560,10 @@ static int nci_close_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); From e941976659f1f6834077a1596bf53e6bdb10e90b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Apr 2022 16:10:33 +0100 Subject: [PATCH 612/708] io_uring: use right issue_flags for splice/tee Pass right issue_flags into into io_file_get_fixed() instead of IO_URING_F_UNLOCKED. It's probably not a problem at the moment but let's do it safer. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7d242daa9df5d776907686977cd29fbceb4a2d8d.1649862516.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6b1a98697dcf..3d6cbf77c89d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4358,7 +4358,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (sp->flags & SPLICE_F_FD_IN_FIXED) - in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); else in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { @@ -4400,7 +4400,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (sp->flags & SPLICE_F_FD_IN_FIXED) - in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); else in = io_file_get_normal(req, sp->splice_fd_in); if (!in) { From cce64ef01308b677a687d90927fc2b2e0e1cba67 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Apr 2022 16:10:34 +0100 Subject: [PATCH 613/708] io_uring: fix poll file assign deadlock We pass "unlocked" into io_assign_file() in io_poll_check_events(), which can lead to double locking. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2476d4ae46554324b599ee4055447b105f20a75a.1649862516.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3d6cbf77c89d..d06f1952fdfa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5858,8 +5858,9 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) if (!req->result) { struct poll_table_struct pt = { ._key = req->apoll_events }; + unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; - if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) + if (unlikely(!io_assign_file(req, flags))) req->result = -EBADF; else req->result = vfs_poll(req->file, &pt) & req->apoll_events; From 7179c3ce3dbff646c55f7cd664a895f462f049e5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Apr 2022 16:10:35 +0100 Subject: [PATCH 614/708] io_uring: fix poll error reporting We should not return an error code in req->result in io_poll_check_events(), because it may get mangled and returned as success. Just return the error code directly, the callers will fail the request or proceed accordingly. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/5f03514ee33324dc811fb93df84aee0f695fb044.1649862516.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d06f1952fdfa..ab674a0d269b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5861,9 +5861,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked) unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; if (unlikely(!io_assign_file(req, flags))) - req->result = -EBADF; - else - req->result = vfs_poll(req->file, &pt) & req->apoll_events; + return -EBADF; + req->result = vfs_poll(req->file, &pt) & req->apoll_events; } /* multishot, just fill an CQE and proceed */ From 08c1af8f1c13bbf210f1760132f4df24d0ed46d6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Apr 2022 14:38:22 -0400 Subject: [PATCH 615/708] dm integrity: fix memory corruption when tag_size is less than digest size It is possible to set up dm-integrity in such a way that the "tag_size" parameter is less than the actual digest size. In this situation, a part of the digest beyond tag_size is ignored. In this case, dm-integrity would write beyond the end of the ic->recalc_tags array and corrupt memory. The corruption happened in integrity_recalc->integrity_sector_checksum->crypto_shash_final. Fix this corruption by increasing the tags array so that it has enough padding at the end to accomodate the loop in integrity_recalc() being able to write a full digest size for the last member of the tags array. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ad2d5faa2ebb..36ae30b73a6e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4399,6 +4399,7 @@ try_smaller_buffer: } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4412,8 +4413,10 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; From 9e949a3886356fe9112c6f6f34a6e23d1d35407f Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 19 Mar 2022 00:20:15 -0700 Subject: [PATCH 616/708] smp: Fix offline cpu check in flush_smp_call_function_queue() The check in flush_smp_call_function_queue() for callbacks that are sent to offline CPUs currently checks whether the queue is empty. However, flush_smp_call_function_queue() has just deleted all the callbacks from the queue and moved all the entries into a local list. This checks would only be positive if some callbacks were added in the short time after llist_del_all() was called. This does not seem to be the intention of this check. Change the check to look at the local list to which the entries were moved instead of the queue from which all the callbacks were just removed. Fixes: 8d056c48e4862 ("CPU hotplug, smp: flush any pending IPI callbacks before CPU offline") Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220319072015.1495036-1-namit@vmware.com --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 01a7c1706a58..65a630f62363 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -579,7 +579,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) /* There shouldn't be any pending callbacks on an offline CPU. */ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && - !warned && !llist_empty(head))) { + !warned && entry != NULL)) { warned = true; WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); From 64c4a37ac04eeb43c42d272f6e6c8c12bfcf4304 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 13 Apr 2022 04:42:51 -0700 Subject: [PATCH 617/708] cifs: potential buffer overflow in handling symlinks Smatch printed a warning: arch/x86/crypto/poly1305_glue.c:198 poly1305_update_arch() error: __memcpy() 'dctx->buf' too small (16 vs u32max) It's caused because Smatch marks 'link_len' as untrusted since it comes from sscanf(). Add a check to ensure that 'link_len' is not larger than the size of the 'link_str' buffer. Fixes: c69c1b6eaea1 ("cifs: implement CIFSParseMFSymlink()") Signed-off-by: Harshit Mogalapalli Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 852e54ee82c2..bbdf3281559c 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -85,6 +85,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (rc != 1) return -EINVAL; + if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) + return -EINVAL; + rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); From ce40426fdc3c92acdba6b5ca74bc7277ffaa6a3d Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Mon, 11 Apr 2022 15:03:35 -0700 Subject: [PATCH 618/708] dm mpath: only use ktime_get_ns() in historical selector Mixing sched_clock() and ktime_get_ns() usage will give bad results. Switch hst_select_path() from using sched_clock() to ktime_get_ns(). Also rename path_service_time()'s 'sched_now' variable to 'now'. Fixes: 2613eab11996 ("dm mpath: add Historical Service Time Path Selector") Signed-off-by: Khazhismel Kumykov Signed-off-by: Mike Snitzer --- drivers/md/dm-ps-historical-service-time.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 875bca30a0dd..82f2a06153dc 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -27,7 +27,6 @@ #include #include #include -#include #define DM_MSG_PREFIX "multipath historical-service-time" @@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, { struct selector *s = ps->context; struct path_info *pi = NULL, *best = NULL; - u64 time_now = sched_clock(); + u64 time_now = ktime_get_ns(); struct dm_path *ret = NULL; unsigned long flags; @@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, static u64 path_service_time(struct path_info *pi, u64 start_time) { - u64 sched_now = ktime_get_ns(); + u64 now = ktime_get_ns(); /* if a previous disk request has finished after this IO was * sent to the hardware, pretend the submission happened @@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) if (time_after64(pi->last_finish, start_time)) start_time = pi->last_finish; - pi->last_finish = sched_now; - if (time_before64(sched_now, start_time)) + pi->last_finish = now; + if (time_before64(now, start_time)) return 0; - return sched_now - start_time; + return now - start_time; } static int hst_end_io(struct path_selector *ps, struct dm_path *path, From 73d7b06e902dd294e1f61554f7c403d0f705cf92 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 13 Apr 2022 11:06:19 -0400 Subject: [PATCH 619/708] dm zone: fix NULL pointer dereference in dm_zone_map_bio Commit 0fbb4d93b38b ("dm: add dm_submit_bio_remap interface") changed the alloc_io() function to delay the initialization of struct dm_io's orig_bio member, leaving it NULL until after the dm_io and associated user submitted bio is processed by __split_and_process_bio(). This change causes a NULL pointer dereference in dm_zone_map_bio() when the original user bio is inspected to detect the need for zone append command emulation. Fix this NULL pointer by updating dm_zone_map_bio() to not access ->orig_bio when the same info can be accessed from the clone of the ->orig_bio _before_ any ->map processing. Save off the bio_op() and bio_sectors() for the clone and then use the saved orig_bio_details as needed. Fixes: 0fbb4d93b38b ("dm: add dm_submit_bio_remap interface") Reported-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zone.c | 49 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index c1ca9be4b79e..57daa86c19cf 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, return 0; } +struct orig_bio_details { + unsigned int op; + unsigned int nr_sectors; +}; + /* * First phase of BIO mapping for targets with zone append emulation: * check all BIO that change a zone writer pointer and change zone * append operations into regular write operations. */ static bool dm_zone_map_bio_begin(struct mapped_device *md, - struct bio *orig_bio, struct bio *clone) + unsigned int zno, struct bio *clone) { sector_t zsectors = blk_queue_zone_sectors(md->queue); - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* @@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, WRITE_ONCE(md->zwp_offset[zno], zwp_offset); } - switch (bio_op(orig_bio)) { + switch (bio_op(clone)) { case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: return true; @@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * target zone. */ clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | - (orig_bio->bi_opf & (~REQ_OP_MASK)); - clone->bi_iter.bi_sector = - orig_bio->bi_iter.bi_sector + zwp_offset; + (clone->bi_opf & (~REQ_OP_MASK)); + clone->bi_iter.bi_sector += zwp_offset; break; default: DMWARN_LIMIT("Invalid BIO operation"); @@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * data written to a zone. Note that at this point, the remapped clone BIO * may already have completed, so we do not touch it. */ -static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, - struct bio *orig_bio, +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, + struct orig_bio_details *orig_bio_details, unsigned int nr_sectors) { - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* The clone BIO may already have been completed and failed */ @@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, return BLK_STS_IOERR; /* Update the zone wp offset */ - switch (bio_op(orig_bio)) { + switch (orig_bio_details->op) { case REQ_OP_ZONE_RESET: WRITE_ONCE(md->zwp_offset[zno], 0); return BLK_STS_OK; @@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, * Check that the target did not truncate the write operation * emulating a zone append. */ - if (nr_sectors != bio_sectors(orig_bio)) { + if (nr_sectors != orig_bio_details->nr_sectors) { DMWARN_LIMIT("Truncated write for zone append"); return BLK_STS_IOERR; } @@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q, bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); } -static bool dm_need_zone_wp_tracking(struct bio *orig_bio) +static bool dm_need_zone_wp_tracking(struct bio *bio) { /* * Special processing is not needed for operations that do not need the @@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio) * zones and all operations that do not modify directly a sequential * zone write pointer. */ - if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio)) + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; - switch (bio_op(orig_bio)) { + switch (bio_op(bio)) { case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_APPEND: - return bio_zone_is_seq(orig_bio); + return bio_zone_is_seq(bio); default: return false; } @@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) struct dm_target *ti = tio->ti; struct mapped_device *md = io->md; struct request_queue *q = md->queue; - struct bio *orig_bio = io->orig_bio; struct bio *clone = &tio->clone; + struct orig_bio_details orig_bio_details; unsigned int zno; blk_status_t sts; int r; @@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio) * IOs that do not change a zone write pointer do not need * any additional special processing. */ - if (!dm_need_zone_wp_tracking(orig_bio)) + if (!dm_need_zone_wp_tracking(clone)) return ti->type->map(ti, clone); /* Lock the target zone */ - zno = bio_zone_no(orig_bio); + zno = bio_zone_no(clone); dm_zone_lock(q, zno, clone); + orig_bio_details.nr_sectors = bio_sectors(clone); + orig_bio_details.op = bio_op(clone); + /* * Check that the bio and the target zone write pointer offset are * both valid, and if the bio is a zone append, remap it to a write. */ - if (!dm_zone_map_bio_begin(md, orig_bio, clone)) { + if (!dm_zone_map_bio_begin(md, zno, clone)) { dm_zone_unlock(q, zno, clone); return DM_MAPIO_KILL; } @@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * The target submitted the clone BIO. The target zone will * be unlocked on completion of the clone. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); break; case DM_MAPIO_REMAPPED: /* @@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * unlock the target zone here as the clone will not be * submitted. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); if (sts != BLK_STS_OK) dm_zone_unlock(q, zno, clone); break; From 1ef3342a934e235aca72b4bcc0d6854d80a65077 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Apr 2022 10:10:36 -0300 Subject: [PATCH 620/708] vfio/pci: Fix vf_token mechanism when device-specific VF drivers are used get_pf_vdev() tries to check if a PF is a VFIO PF by looking at the driver: if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { However now that we have multiple VF and PF drivers this is no longer reliable. This means that security tests realted to vf_token can be skipped by mixing and matching different VFIO PCI drivers. Instead of trying to use the driver core to find the PF devices maintain a linked list of all PF vfio_pci_core_device's that we have called pci_enable_sriov() on. When registering a VF just search the list to see if the PF is present and record the match permanently in the struct. PCI core locking prevents a PF from passing pci_disable_sriov() while VF drivers are attached so the VFIO owned PF becomes a static property of the VF. In common cases where vfio does not own the PF the global list remains empty and the VF's pointer is statically NULL. This also fixes a lockdep splat from recursive locking of the vfio_group::device_lock between vfio_device_get_from_name() and vfio_device_get_from_dev(). If the VF and PF share the same group this would deadlock. Fixes: ff53edf6d6ab ("vfio/pci: Split the pci_driver code out of vfio_pci_core.c") Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v3-876570980634+f2e8-vfio_vf_token_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 124 ++++++++++++++++++------------- include/linux/vfio_pci_core.h | 2 + 2 files changed, 76 insertions(+), 50 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index b7bb16f92ac6..06b6f3594a13 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -36,6 +36,10 @@ static bool nointxmask; static bool disable_vga; static bool disable_idle_d3; +/* List of PF's that vfio_pci_core_sriov_configure() has been called on */ +static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex); +static LIST_HEAD(vfio_pci_sriov_pfs); + static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -434,47 +438,17 @@ out: } EXPORT_SYMBOL_GPL(vfio_pci_core_disable); -static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) -{ - struct pci_dev *physfn = pci_physfn(vdev->pdev); - struct vfio_device *pf_dev; - - if (!vdev->pdev->is_virtfn) - return NULL; - - pf_dev = vfio_device_get_from_dev(&physfn->dev); - if (!pf_dev) - return NULL; - - if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { - vfio_device_put(pf_dev); - return NULL; - } - - return container_of(pf_dev, struct vfio_pci_core_device, vdev); -} - -static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) -{ - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); - - if (!pf_vdev) - return; - - mutex_lock(&pf_vdev->vf_token->lock); - pf_vdev->vf_token->users += val; - WARN_ON(pf_vdev->vf_token->users < 0); - mutex_unlock(&pf_vdev->vf_token->lock); - - vfio_device_put(&pf_vdev->vdev); -} - void vfio_pci_core_close_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); - vfio_pci_vf_token_user_add(vdev, -1); + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users); + vdev->sriov_pf_core_dev->vf_token->users--; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_core_disable(vdev); @@ -495,7 +469,12 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); vfio_spapr_pci_eeh_open(vdev->pdev); - vfio_pci_vf_token_user_add(vdev, 1); + + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + vdev->sriov_pf_core_dev->vf_token->users++; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } } EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); @@ -1583,11 +1562,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, * * If the VF token is provided but unused, an error is generated. */ - if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token) - return 0; /* No VF token provided or required */ - if (vdev->pdev->is_virtfn) { - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); + struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev; bool match; if (!pf_vdev) { @@ -1600,7 +1576,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, } if (!vf_token) { - vfio_device_put(&pf_vdev->vdev); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1610,8 +1585,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, match = uuid_equal(uuid, &pf_vdev->vf_token->uuid); mutex_unlock(&pf_vdev->vf_token->lock); - vfio_device_put(&pf_vdev->vdev); - if (!match) { pci_info_ratelimited(vdev->pdev, "Incorrect VF token provided for device\n"); @@ -1732,8 +1705,30 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_core_device *cur; + struct pci_dev *physfn; int ret; + if (pdev->is_virtfn) { + /* + * If this VF was created by our vfio_pci_core_sriov_configure() + * then we can find the PF vfio_pci_core_device now, and due to + * the locking in pci_disable_sriov() it cannot change until + * this VF device driver is removed. + */ + physfn = pci_physfn(vdev->pdev); + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) { + if (cur->pdev == physfn) { + vdev->sriov_pf_core_dev = cur; + break; + } + } + mutex_unlock(&vfio_pci_sriov_pfs_mutex); + return 0; + } + + /* Not a SRIOV PF */ if (!pdev->is_physfn) return 0; @@ -1805,6 +1800,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); + INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); } EXPORT_SYMBOL_GPL(vfio_pci_core_init_device); @@ -1896,7 +1892,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; - pci_disable_sriov(pdev); + vfio_pci_core_sriov_configure(pdev, 0); vfio_unregister_group_dev(&vdev->vdev); @@ -1935,21 +1931,49 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected); int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { + struct vfio_pci_core_device *vdev; struct vfio_device *device; int ret = 0; + device_lock_assert(&pdev->dev); + device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -ENODEV; - if (nr_virtfn == 0) - pci_disable_sriov(pdev); - else + vdev = container_of(device, struct vfio_pci_core_device, vdev); + + if (nr_virtfn) { + mutex_lock(&vfio_pci_sriov_pfs_mutex); + /* + * The thread that adds the vdev to the list is the only thread + * that gets to call pci_enable_sriov() and we will only allow + * it to be called once without going through + * pci_disable_sriov() + */ + if (!list_empty(&vdev->sriov_pfs_item)) { + ret = -EINVAL; + goto out_unlock; + } + list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs); + mutex_unlock(&vfio_pci_sriov_pfs_mutex); ret = pci_enable_sriov(pdev, nr_virtfn); + if (ret) + goto out_del; + ret = nr_virtfn; + goto out_put; + } + pci_disable_sriov(pdev); + +out_del: + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_del_init(&vdev->sriov_pfs_item); +out_unlock: + mutex_unlock(&vfio_pci_sriov_pfs_mutex); +out_put: vfio_device_put(device); - - return ret < 0 ? ret : nr_virtfn; + return ret; } EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 74a4a0f17b28..48f2dd3c568c 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -133,6 +133,8 @@ struct vfio_pci_core_device { struct mutex ioeventfds_lock; struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; + struct list_head sriov_pfs_item; + struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; struct mutex vma_lock; struct list_head vma_list; From b7ba6d8dc3569e49800ef0136799f26f43e237e8 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 11 Apr 2022 16:22:32 +0100 Subject: [PATCH 621/708] cpu/hotplug: Remove the 'cpu' member of cpuhp_cpu_state Currently the setting of the 'cpu' member of struct cpuhp_cpu_state in cpuhp_create() is too late as it is used earlier in _cpu_up(). If kzalloc_node() in __smpboot_create_thread() fails then the rollback will be done with st->cpu==0 causing CPU0 to be erroneously set to be dying, causing the scheduler to get mightily confused and throw its toys out of the pram. However the cpu number is actually available directly, so simply remove the 'cpu' member and avoid the problem in the first place. Fixes: 2ea46c6fc945 ("cpumask/hotplug: Fix cpu_dying() state tracking") Signed-off-by: Steven Price Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220411152233.474129-2-steven.price@arm.com --- kernel/cpu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 5797c2a7a93f..d0a9aa0b42e8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -71,7 +71,6 @@ struct cpuhp_cpu_state { bool rollback; bool single; bool bringup; - int cpu; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; @@ -475,7 +474,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } #endif static inline enum cpuhp_state -cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) +cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; bool bringup = st->state < target; @@ -486,14 +485,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) st->target = target; st->single = false; st->bringup = bringup; - if (cpu_dying(st->cpu) != !bringup) - set_cpu_dying(st->cpu, !bringup); + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); return prev_state; } static inline void -cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) +cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state prev_state) { bool bringup = !st->bringup; @@ -520,8 +520,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) } st->bringup = bringup; - if (cpu_dying(st->cpu) != !bringup) - set_cpu_dying(st->cpu, !bringup); + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ @@ -541,15 +541,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) wait_for_ap_thread(st, st->bringup); } -static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) +static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state target) { enum cpuhp_state prev_state; int ret; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); __cpuhp_kick_ap(st); if ((ret = st->result)) { - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } @@ -581,7 +582,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; - return cpuhp_kick_ap(st, st->target); + return cpuhp_kick_ap(cpu, st, st->target); } static int bringup_cpu(unsigned int cpu) @@ -704,7 +705,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret, cpu, cpuhp_get_step(st->state)->name, st->state); - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); if (can_rollback_cpu(st)) WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, prev_state)); @@ -721,7 +722,6 @@ static void cpuhp_create(unsigned int cpu) init_completion(&st->done_up); init_completion(&st->done_down); - st->cpu = cpu; } static int cpuhp_should_run(unsigned int cpu) @@ -875,7 +875,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu) cpuhp_lock_release(true); trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); - ret = cpuhp_kick_ap(st, st->target); + ret = cpuhp_kick_ap(cpu, st, st->target); trace_cpuhp_exit(cpu, st->state, prev_state, ret); return ret; @@ -1107,7 +1107,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret, cpu, cpuhp_get_step(st->state)->name, st->state); - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); if (st->state < prev_state) WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, @@ -1134,7 +1134,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, cpuhp_tasks_frozen = tasks_frozen; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread. @@ -1165,7 +1165,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ret = cpuhp_down_callbacks(cpu, st, target); if (ret && st->state < prev_state) { if (st->state == CPUHP_TEARDOWN_CPU) { - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } else { WARN(1, "DEAD callback error for CPU%d", cpu); @@ -1352,7 +1352,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) cpuhp_tasks_frozen = tasks_frozen; - cpuhp_set_state(st, target); + cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread once more. From d73f5d14e0cdd1f39764379250f26163913d7155 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Wed, 13 Apr 2022 09:33:02 +0000 Subject: [PATCH 622/708] perf stat: Fix error check return value of hashmap__new(), must use IS_ERR() hashmap__new() returns ERR_PTR(-ENOMEM) when it fails, so we should use IS_ERR() to check it in error handling path. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220413093302.2538128-1-lv.ruyi@zte.com.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ee6f03481215..817a2de264b4 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -311,7 +312,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!mask) { mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); - if (!mask) + if (IS_ERR(mask)) return -ENOMEM; counter->per_pkg_mask = mask; From a668cc07f990d2ed19424d5c1a529521a9d1cee1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 13 Apr 2022 14:42:32 +0300 Subject: [PATCH 623/708] perf tools: Fix segfault accessing sample_id xyarray perf_evsel::sample_id is an xyarray which can cause a segfault when accessed beyond its size. e.g. # perf record -e intel_pt// -C 1 sleep 1 Segmentation fault (core dumped) # That is happening because a dummy event is opened to capture text poke events accross all CPUs, however the mmap logic is allocating according to the number of user_requested_cpus. In general, perf sometimes uses the evsel cpus to open events, and sometimes the evlist user_requested_cpus. However, it is not necessary to determine which case is which because the opened event file descriptors are also in an xyarray, the size of whch can be used to correctly allocate the size of the sample_id xyarray, because there is one ID per file descriptor. Note, in the affected code path, perf_evsel fd array is subsequently used to get the file descriptor for the mmap, so it makes sense for the xyarrays to be the same size there. Fixes: d1a177595b3a824c ("libperf: Adopt perf_evlist__mmap()/munmap() from tools/perf") Fixes: 246eba8e9041c477 ("perf tools: Add support for PERF_RECORD_TEXT_POKE") Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # 5.5+ Link: https://lore.kernel.org/r/20220413114232.26914-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 1b15ba13c477..a09315538a30 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, { struct perf_evsel *evsel; const struct perf_cpu_map *cpus = evlist->user_requested_cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } From e4f1541caf60fcbe5a59e9d25805c0b5865e546a Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 29 Mar 2022 19:18:35 -0100 Subject: [PATCH 624/708] drm/amd/display: don't ignore alpha property on pre-multiplied mode "Pre-multiplied" is the default pixel blend mode for KMS/DRM, as documented in supported_modes of drm_plane_create_blend_mode_property(): https://cgit.freedesktop.org/drm/drm-misc/tree/drivers/gpu/drm/drm_blend.c In this mode, both 'pixel alpha' and 'plane alpha' participate in the calculation, as described by the pixel blend mode formula in KMS/DRM documentation: out.rgb = plane_alpha * fg.rgb + (1 - (plane_alpha * fg.alpha)) * bg.rgb Considering the blend config mechanisms we have in the driver so far, the alpha mode that better fits this blend mode is the _PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN, where the value for global_gain is the plane alpha (global_alpha). With this change, alpha property stops to be ignored. It also addresses Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1734 v2: * keep the 8-bit value for global_alpha_value (Nicholas) * correct the logical ordering for combined global gain (Nicholas) * apply to dcn10 too (Nicholas) Signed-off-by: Melissa Wen Tested-by: Rodrigo Siqueira Reviewed-by: Harry Wentland Tested-by: Simon Ser Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 14 +++++++++----- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 781334b395ba..83fbea2df410 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2522,14 +2522,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 4290eaf11a04..b627c41713cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2344,14 +2344,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else From e3cf2e05441a2c5107fbffadb5b7943113ee11dd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Apr 2022 14:39:03 -0400 Subject: [PATCH 625/708] drm/amdgpu: fix VCN 3.1.2 firmware name Drop the trailing vcn. Fixes: afc2f276057ea1 ("drm/amdgpu/vcn: add vcn support for vcn 3.1.2") Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f99093f2ebc7..a0ee828a4a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -52,7 +52,7 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" -#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); From 887f75cfd0da44c19dda93b2ff9e70ca8792cdc1 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 7 Apr 2022 20:12:28 +0800 Subject: [PATCH 626/708] drm/amdgpu: Ensure HDA function is suspended before ASIC reset DP/HDMI audio on AMD PRO VII stops working after S3: [ 149.450391] amdgpu 0000:63:00.0: amdgpu: MODE1 reset [ 149.450395] amdgpu 0000:63:00.0: amdgpu: GPU mode1 reset [ 149.450494] amdgpu 0000:63:00.0: amdgpu: GPU psp mode1 reset [ 149.983693] snd_hda_intel 0000:63:00.1: refused to change power state from D0 to D3hot [ 150.003439] amdgpu 0000:63:00.0: refused to change power state from D0 to D3hot ... [ 155.432975] snd_hda_intel 0000:63:00.1: CORB reset timeout#2, CORBRP = 65535 The offending commit is daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)"). Commit 34452ac3038a7 ("drm/amdgpu: don't use BACO for reset in S3 ") doesn't help, so the issue is something different. Assuming that to make HDA resume to D0 fully realized, it needs to be successfully put to D3 first. And this guesswork proves working, by moving amdgpu_asic_reset() to noirq callback, so it's called after HDA function is in D3. Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b03663f42cc9..29e9419a914b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2575,6 +2580,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, From 4593c1b6d159f1e5c35c07a7f125e79e5a864302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mo=C5=84?= Date: Wed, 6 Apr 2022 21:49:21 +0200 Subject: [PATCH 627/708] drm/amdgpu: Enable gfxoff quirk on MacBook Pro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling gfxoff quirk results in perfectly usable graphical user interface on MacBook Pro (15-inch, 2019) with Radeon Pro Vega 20 4 GB. Without the quirk, X server is completely unusable as every few seconds there is gpu reset due to ring gfx timeout. Signed-off-by: Tomasz Moń Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46d4bf27ebbb..b8cfcc6b1125 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; From aadaeca46ce54af9f8f494792a1ba47a6fbda7ba Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 30 Mar 2022 15:25:00 -0400 Subject: [PATCH 628/708] drm/amd/display: remove dtbclk_ss compensation for dcn316 [why] dcn316's dtbclk is from non_ss clock source. no compensation required here. Reviewed-by: Chris Park Acked-by: Pavle Kotarac Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 2 +- .../gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index dfba6138f538..26feefbb8990 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -374,7 +374,7 @@ void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce) clk_mgr_dce->dprefclk_ss_percentage = info.spread_spectrum_percentage; } - if (clk_mgr_dce->base.ctx->dc->debug.ignore_dpref_ss) + if (clk_mgr_dce->base.ctx->dc->config.ignore_dpref_ss) clk_mgr_dce->dprefclk_ss_percentage = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 702d00ce7da4..3121dd2d2a91 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -686,8 +686,8 @@ void dcn316_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base); clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; dce_clock_read_ss_info(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = - dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz); + /*clk_mgr->base.dccg->ref_dtbclk_khz = + dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/ clk_mgr->base.base.bw_params = &dcn316_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 77ef9d1f9ea8..9e79f60e6129 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -340,6 +340,7 @@ struct dc_config { bool is_asymmetric_memory; bool is_single_rank_dimm; bool use_pipe_ctx_sync_logic; + bool ignore_dpref_ss; }; enum visual_confirm { @@ -729,7 +730,6 @@ struct dc_debug_options { bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; union aux_wake_wa_options aux_wake_wa; - bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; From 9e02977bfad006af328add9434c8bffa40e053bb Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Wed, 13 Apr 2022 08:32:22 +0200 Subject: [PATCH 629/708] dma-direct: avoid redundant memory sync for swiotlb When we looked into FIO performance with swiotlb enabled in VM, we found swiotlb_bounce() is always called one more time than expected for each DMA read request. It turns out that the bounce buffer is copied to original DMA buffer twice after the completion of a DMA request (one is done by in dma_direct_sync_single_for_cpu(), the other by swiotlb_tbl_unmap_single()). But the content in bounce buffer actually doesn't change between the two rounds of copy. So, one round of copy is redundant. Pass DMA_ATTR_SKIP_CPU_SYNC flag to swiotlb_tbl_unmap_single() to skip the memory copy in it. This fix increases FIO 64KB sequential read throughput in a guest with swiotlb=force by 5.6%. Fixes: 55897af63091 ("dma-direct: merge swiotlb_dma_ops into the dma_direct code") Reported-by: Wang Zhaoyang1 Reported-by: Gao Liang Signed-off-by: Chao Gao Reviewed-by: Kevin Tian Signed-off-by: Christoph Hellwig --- kernel/dma/direct.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 4632b0f4f72e..8a6cd53dbe8c 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, dma_direct_sync_single_for_cpu(dev, addr, size, dir); if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ From 8b6c58458ee3206dde345fce327a4cb83e69caf9 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 13 Apr 2022 10:02:17 +1000 Subject: [PATCH 630/708] cifs: verify that tcon is valid before dereference in cifs_kill_sb On umount, cifs_sb->tlink_tree might contain entries that do not represent a valid tcon. Check the tcon for error before we dereference it. Signed-off-by: Ronnie Sahlberg Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Reported-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index aba0783a8f09..2b1a1c029c75 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -266,10 +266,11 @@ static void cifs_kill_sb(struct super_block *sb) * before we kill the sb. */ if (cifs_sb->root) { - node = rb_first(root); - while (node != NULL) { + for (node = rb_first(root); node; node = rb_next(node)) { tlink = rb_entry(node, struct tcon_link, tl_rbnode); tcon = tlink_tcon(tlink); + if (IS_ERR(tcon)) + continue; cfid = &tcon->crfid; mutex_lock(&cfid->fid_mutex); if (cfid->dentry) { @@ -277,7 +278,6 @@ static void cifs_kill_sb(struct super_block *sb) cfid->dentry = NULL; } mutex_unlock(&cfid->fid_mutex); - node = rb_next(node); } /* finally release root dentry */ From 968a1a5d6541cd24e37dadc1926eab9c10aeb09b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Apr 2022 15:58:52 +0200 Subject: [PATCH 631/708] tun: annotate access to queue->trans_start Commit 5337824f4dc4 ("net: annotate accesses to queue->trans_start") introduced a new helper, txq_trans_cond_update, to update queue->trans_start using WRITE_ONCE. One snippet in drivers/net/tun.c was missed, as it was introduced roughly at the same time. Fixes: 5337824f4dc4 ("net: annotate accesses to queue->trans_start") Cc: Eric Dumazet Signed-off-by: Antoine Tenart Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220412135852.466386-1-atenart@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 276a0e42ca8e..dbe4c0a4be2c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1124,7 +1124,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); - queue->trans_start = jiffies; + txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) From 00fa91bc9cc2a9d340f963af5e457610ad4b2f9c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Apr 2022 20:22:09 +0300 Subject: [PATCH 632/708] net: dsa: felix: fix tagging protocol changes with multiple CPU ports When the device tree has 2 CPU ports defined, a single one is active (has any dp->cpu_dp pointers point to it). Yet the second one is still a CPU port, and DSA still calls ->change_tag_protocol on it. On the NXP LS1028A, the CPU ports are ports 4 and 5. Port 4 is the active CPU port and port 5 is inactive. After the following commands: # Initial setting cat /sys/class/net/eno2/dsa/tagging ocelot echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging echo ocelot > /sys/class/net/eno2/dsa/tagging traffic is now broken, because the driver has moved the NPI port from port 4 to port 5, unbeknown to DSA. The problem can be avoided by detecting that the second CPU port is unused, and not doing anything for it. Further rework will be needed when proper support for multiple CPU ports is added. Treat this as a bug and prepare current kernels to work in single-CPU mode with multiple-CPU DT blobs. Fixes: adb3dccf090b ("net: dsa: felix: convert to the new .change_tag_protocol DSA API") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220412172209.2531865-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/ocelot/felix.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 413b0006e9a2..9e28219b223d 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -670,6 +670,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); enum dsa_tag_protocol old_proto = felix->tag_proto; + bool cpu_port_active = false; + struct dsa_port *dp; int err; if (proto != DSA_TAG_PROTO_SEVILLE && @@ -677,6 +679,27 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, proto != DSA_TAG_PROTO_OCELOT_8021Q) return -EPROTONOSUPPORT; + /* We don't support multiple CPU ports, yet the DT blob may have + * multiple CPU ports defined. The first CPU port is the active one, + * the others are inactive. In this case, DSA will call + * ->change_tag_protocol() multiple times, once per CPU port. + * Since we implement the tagging protocol change towards "ocelot" or + * "seville" as effectively initializing the NPI port, what we are + * doing is effectively changing who the NPI port is to the last @cpu + * argument passed, which is an unused DSA CPU port and not the one + * that should actively pass traffic. + * Suppress DSA's calls on CPU ports that are inactive. + */ + dsa_switch_for_each_user_port(dp, ds) { + if (dp->cpu_dp->index == cpu) { + cpu_port_active = true; + break; + } + } + + if (!cpu_port_active) + return 0; + felix_del_tag_protocol(ds, cpu, old_proto); err = felix_set_tag_protocol(ds, cpu, proto); From 23cfe941b52e2fa645bdfd770087128a74c7dbee Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 12 Apr 2022 22:25:06 +0200 Subject: [PATCH 633/708] rtnetlink: Fix handling of disabled L3 stats in RTM_GETSTATS replies When L3 stats are disabled, rtnl_offload_xstats_get_size_stats() returns size of 0, which is supposed to be an indication that the corresponding attribute should not be emitted. However, instead, the current code reserves a 0-byte attribute. The reason this does not show up as a citation on a kasan kernel is that netdev_offload_xstats_get(), which is supposed to fill in the data, never ends up getting called, because rtnl_offload_xstats_get_stats() notices that the stats are not actually used and skips the call. Thus a zero-length IFLA_OFFLOAD_XSTATS_L3_STATS attribute ends up in a response, confusing the userspace. Fix by skipping the L3-stats related block in rtnl_offload_xstats_fill(). Fixes: 0e7788fd7622 ("net: rtnetlink: Add UAPI for obtaining L3 offload xstats") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/591b58e7623edc3eb66dd1fcfa8c8f133d090974.1649794741.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 159c9c61e6af..d1381ea6d52e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5242,6 +5242,8 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, *prividx = attr_id_l3_stats; size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3); + if (!size_l3) + goto skip_l3_stats; attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) @@ -5253,6 +5255,7 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, return err; have_data = true; +skip_l3_stats: *prividx = 0; } From 2df3fc4a84e917a422935cc5bae18f43f9955d31 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Tue, 12 Apr 2022 16:04:20 -0500 Subject: [PATCH 634/708] net: bcmgenet: Revert "Use stronger register read/writes to assure ordering" It turns out after digging deeper into this bug, that it was being triggered by GCC12 failing to call the bcmgenet_enable_dma() routine. Given that a gcc12 fix has been merged [1] and the genet driver now works properly when built with gcc12, this commit should be reverted. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160 https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aabb9a261ef060cf24fd626713f1d7d9df81aa57 Fixes: 8d3ea3d402db ("net: bcmgenet: Use stronger register read/writes to assure ordering") Signed-off-by: Jeremy Linton Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220412210420.1129430-1-jeremy.linton@arm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2dd79af9411b..9a41145dadfc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(value, offset); else - writel(value, offset); + writel_relaxed(value, offset); } static inline u32 bcmgenet_readl(void __iomem *offset) @@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(offset); else - return readl(offset); + return readl_relaxed(offset); } static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv, From 3836c73e6a2585561af928c6641d74528a8bdfa4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 Apr 2022 16:01:32 +0200 Subject: [PATCH 635/708] gpio: sim: fix setting and getting multiple lines We need to take mask into account in the set/get_multiple() callbacks. Use bitmap_replace() instead of bitmap_copy(). Fixes: cb8c474e79be ("gpio: sim: new testing module") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/gpio/gpio-sim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 8e5d87984a48..41c31b10ae84 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -134,7 +134,7 @@ static int gpio_sim_get_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(bits, chip->value_map, gc->ngpio); + bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio); mutex_unlock(&chip->lock); return 0; @@ -146,7 +146,7 @@ static void gpio_sim_set_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(chip->value_map, bits, gc->ngpio); + bitmap_replace(chip->value_map, chip->value_map, bits, mask, gc->ngpio); mutex_unlock(&chip->lock); } From 23380e4d53305765789fd1a2cf6bddb07239cd3b Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Apr 2022 18:46:40 -0700 Subject: [PATCH 636/708] perf record: Fix per-thread option Per-thread mode doesn't have specific CPUs for events, add checks for this case. Minor fix to a pr_debug by Ian Rogers to avoid an out of bound array access. Fixes: 7954f71689f90cb2 ("perf record: Introduce thread affinity and mmap masks") Reported-by: Ian Rogers Signed-off-by: Alexey Bayduraev Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220414014642.3308206-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ba74fab02e62..069825c48d40 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru struct mmap *overwrite_mmap = evlist->overwrite_mmap; struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; - thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, - thread_data->mask->maps.nbits); + if (cpu_map__is_dummy(cpus)) + thread_data->nr_mmaps = nr_mmaps; + else + thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, + thread_data->mask->maps.nbits); if (mmap) { thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); if (!thread_data->maps) @@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { - if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { + if (cpu_map__is_dummy(cpus) || + test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { if (thread_data->maps) { thread_data->maps[tm] = &mmap[m]; pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", - thread_data, cpus->map[m].cpu, tm, m); + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); } if (thread_data->overwrite_maps) { thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", - thread_data, cpus->map[m].cpu, tm, m); + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); } tm++; } @@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c { int c; + if (cpu_map__is_dummy(cpus)) + return; + for (c = 0; c < cpus->nr; c++) set_bit(cpus->map[c].cpu, mask->bits); } @@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec) if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus); + if (cpu_map__is_dummy(cpus)) { + pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); + return -EINVAL; + } + switch (rec->opts.threads_spec) { case THREAD_SPEC__CPU: ret = record__init_thread_cpu_masks(rec, cpus); From 8cb7a188ac33b71ec24a9081d1521f46dcab53c4 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 12 Apr 2022 22:10:58 +0530 Subject: [PATCH 637/708] perf bench: Fix numa testcase to check if CPU used to bind task is online Perf numa bench test fails with error: Testcase: ./perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk Failure snippet: <<>> Running 'numa/mem' benchmark: # Running main, "perf bench numa numa-mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk" perf: bench/numa.c:333: bind_to_cpumask: Assertion `!(ret)' failed. <<>> The Testcases uses CPU's 0 and 8. In function "parse_setup_cpu_list", There is check to see if cpu number is greater than max cpu's possible in the system ie via "if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {". But it could happen that system has say 48 CPU's, but only number of online CPU's is 0-7. Other CPU's are offlined. Since "g->p.nr_cpus" is 48, so function will go ahead and set bit for CPU 8 also in cpumask ( td->bind_cpumask). bind_to_cpumask function is called to set affinity using sched_setaffinity and the cpumask. Since the CPU8 is not present, set affinity will fail here with EINVAL. Fix this issue by adding a check to make sure that, CPU's provided in the input argument values are online before proceeding further and skip the test. For this, include new helper function "is_cpu_online" in "tools/perf/util/header.c". Since "BIT(x)" definition will get included from header.h, remove that from bench/numa.c Reported-by: Disha Goel Signed-off-by: Athira Jajeev Tested-by: Disha Goel Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220412164059.42654-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 8 +++++-- tools/perf/util/header.c | 51 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 1 + 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f2640179ada9..c838c248aa2c 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -34,6 +34,7 @@ #include #include +#include "../util/header.h" #include #include @@ -585,6 +586,11 @@ static int parse_setup_cpu_list(void) return -1; } + if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) { + printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n"); + return -1; + } + BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); @@ -752,8 +758,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused, return parse_node_list(arg); } -#define BIT(x) (1ul << x) - static inline uint32_t lfsr_32(uint32_t lfsr) { const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d546ff724dbe..a27132e5a5ef 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -983,6 +983,57 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index c9e3265832d9..0eb4bc29a5a4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size); int write_padded(struct feat_fd *fd, const void *bf, size_t count, size_t count_aligned); +int is_cpu_online(unsigned int cpu); /* * arch specific callback */ From f58faed7fb3f784efdbf3b401368dcf51a6e28fa Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 12 Apr 2022 22:10:59 +0530 Subject: [PATCH 638/708] perf bench: Fix numa bench to fix usage of affinity for machines with #CPUs > 1K The 'perf bench numa' testcase fails on systems with more than 1K CPUs. Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1 Snippet of code: <<>> perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed. Aborted (core dumped) <<>> bind_to_node() uses "sched_getaffinity" to save the original cpumask and this call is returning EINVAL ((invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this for "orig_mask", apply same logic to "mask" as well which is used to setaffinity so that mask size is large enough to represent number of possible CPU's in the system. sched_getaffinity is used in one more place in perf numa bench. It is in "bind_to_cpu" function. Apply the same logic there also. Though currently no failure is reported from there, it is ideal to change getaffinity to work with such system configurations having CPU's more than default mask size supported by glibc. Also fix "sched_setaffinity" to use mask size which is large enough to represent number of possible CPU's in the system. Fixed all places where "bind_cpumask" which is part of "struct thread_data" is used such that bind_cpumask works in all configuration. Reported-by: Disha Goel Signed-off-by: Athira Jajeev Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220412164059.42654-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 128 +++++++++++++++++++++++++++++----------- 1 file changed, 95 insertions(+), 33 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index c838c248aa2c..44e1f8a44087 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -55,7 +55,7 @@ struct thread_data { int curr_cpu; - cpu_set_t bind_cpumask; + cpu_set_t *bind_cpumask; int bind_node; u8 *process_data; int process_nr; @@ -267,71 +267,115 @@ static bool node_has_cpus(int node) return ret; } -static cpu_set_t bind_to_cpu(int target_cpu) +static cpu_set_t *bind_to_cpu(int target_cpu) { - cpu_set_t orig_mask, mask; - int ret; + int nrcpus = numa_num_possible_cpus(); + cpu_set_t *orig_mask, *mask; + size_t size; - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); - BUG_ON(ret); + orig_mask = CPU_ALLOC(nrcpus); + BUG_ON(!orig_mask); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, orig_mask); - CPU_ZERO(&mask); + if (sched_getaffinity(0, size, orig_mask)) + goto err_out; + + mask = CPU_ALLOC(nrcpus); + if (!mask) + goto err_out; + + CPU_ZERO_S(size, mask); if (target_cpu == -1) { int cpu; for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } else { - BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); - CPU_SET(target_cpu, &mask); + if (target_cpu < 0 || target_cpu >= g->p.nr_cpus) + goto err; + + CPU_SET_S(target_cpu, size, mask); } - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + if (sched_setaffinity(0, size, mask)) + goto err; return orig_mask; + +err: + CPU_FREE(mask); +err_out: + CPU_FREE(orig_mask); + + /* BUG_ON due to failure in allocation of orig_mask/mask */ + BUG_ON(-1); } -static cpu_set_t bind_to_node(int target_node) +static cpu_set_t *bind_to_node(int target_node) { - cpu_set_t orig_mask, mask; + int nrcpus = numa_num_possible_cpus(); + size_t size; + cpu_set_t *orig_mask, *mask; int cpu; - int ret; - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); - BUG_ON(ret); + orig_mask = CPU_ALLOC(nrcpus); + BUG_ON(!orig_mask); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, orig_mask); - CPU_ZERO(&mask); + if (sched_getaffinity(0, size, orig_mask)) + goto err_out; + + mask = CPU_ALLOC(nrcpus); + if (!mask) + goto err_out; + + CPU_ZERO_S(size, mask); if (target_node == NUMA_NO_NODE) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } else { struct bitmask *cpumask = numa_allocate_cpumask(); - BUG_ON(!cpumask); + if (!cpumask) + goto err; + if (!numa_node_to_cpus(target_node, cpumask)) { for (cpu = 0; cpu < (int)cpumask->size; cpu++) { if (numa_bitmask_isbitset(cpumask, cpu)) - CPU_SET(cpu, &mask); + CPU_SET_S(cpu, size, mask); } } numa_free_cpumask(cpumask); } - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + if (sched_setaffinity(0, size, mask)) + goto err; return orig_mask; + +err: + CPU_FREE(mask); +err_out: + CPU_FREE(orig_mask); + + /* BUG_ON due to failure in allocation of orig_mask/mask */ + BUG_ON(-1); } -static void bind_to_cpumask(cpu_set_t mask) +static void bind_to_cpumask(cpu_set_t *mask) { int ret; + size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus()); - ret = sched_setaffinity(0, sizeof(mask), &mask); - BUG_ON(ret); + ret = sched_setaffinity(0, size, mask); + if (ret) { + CPU_FREE(mask); + BUG_ON(ret); + } } static void mempol_restore(void) @@ -377,7 +421,7 @@ do { \ static u8 *alloc_data(ssize_t bytes0, int map_flags, int init_zero, int init_cpu0, int thp, int init_random) { - cpu_set_t orig_mask; + cpu_set_t *orig_mask = NULL; ssize_t bytes; u8 *buf; int ret; @@ -435,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags, /* Restore affinity: */ if (init_cpu0) { bind_to_cpumask(orig_mask); + CPU_FREE(orig_mask); mempol_restore(); } @@ -595,6 +640,7 @@ static int parse_setup_cpu_list(void) BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { + size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus); int i; for (i = 0; i < mul; i++) { @@ -614,10 +660,15 @@ static int parse_setup_cpu_list(void) tprintf("%2d", bind_cpu); } - CPU_ZERO(&td->bind_cpumask); + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); + BUG_ON(!td->bind_cpumask); + CPU_ZERO_S(size, td->bind_cpumask); for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { - BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); - CPU_SET(cpu, &td->bind_cpumask); + if (cpu < 0 || cpu >= g->p.nr_cpus) { + CPU_FREE(td->bind_cpumask); + BUG_ON(-1); + } + CPU_SET_S(cpu, size, td->bind_cpumask); } t++; } @@ -1245,7 +1296,7 @@ static void *worker_thread(void *__tdata) * by migrating to CPU#0: */ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { - cpu_set_t orig_mask; + cpu_set_t *orig_mask; int target_cpu; int this_cpu; @@ -1269,6 +1320,7 @@ static void *worker_thread(void *__tdata) printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); bind_to_cpumask(orig_mask); + CPU_FREE(orig_mask); } if (details >= 3) { @@ -1402,21 +1454,31 @@ static void init_thread_data(void) for (t = 0; t < g->p.nr_tasks; t++) { struct thread_data *td = g->threads + t; + size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus); int cpu; /* Allow all nodes by default: */ td->bind_node = NUMA_NO_NODE; /* Allow all CPUs by default: */ - CPU_ZERO(&td->bind_cpumask); + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); + BUG_ON(!td->bind_cpumask); + CPU_ZERO_S(cpuset_size, td->bind_cpumask); for (cpu = 0; cpu < g->p.nr_cpus; cpu++) - CPU_SET(cpu, &td->bind_cpumask); + CPU_SET_S(cpu, cpuset_size, td->bind_cpumask); } } static void deinit_thread_data(void) { ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; + int t; + + /* Free the bind_cpumask allocated for thread_data */ + for (t = 0; t < g->p.nr_tasks; t++) { + struct thread_data *td = g->threads + t; + CPU_FREE(td->bind_cpumask); + } free_data(g->threads, size); } From 8535c0185d14ea41f0efd6a357961b05daf6687e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 14 Apr 2022 16:44:43 +0800 Subject: [PATCH 639/708] block: fix offset/size check in bio_trim() Unit of bio->bi_iter.bi_size is bytes, but unit of offset/size is sector. Fix the above issue in checking offset/size in bio_trim(). Fixes: e83502ca5f1e ("block: fix argument type of bio_trim()") Cc: Chaitanya Kulkarni Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220414084443.1736850-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index cdd7b2915c53..4259125e16ab 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1598,7 +1598,7 @@ EXPORT_SYMBOL(bio_split); void bio_trim(struct bio *bio, sector_t offset, sector_t size) { if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS || - offset + size > bio->bi_iter.bi_size)) + offset + size > bio_sectors(bio))) return; size <<= 9; From 3e3876d322aef82416ecc496a4d4a587e0fdf7a3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Apr 2022 16:48:36 +0800 Subject: [PATCH 640/708] block: null_blk: end timed out poll request When poll request is timed out, it is removed from the poll list, but not completed, so the request is leaked, and never get chance to complete. Fix the issue by ending it in timeout handler. Fixes: 0a593fbbc245 ("null_blk: poll queue support") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220413084836.1571995-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 05b1120e6623..c441a4972064 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1600,7 +1600,7 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) * Only fake timeouts need to execute blk_mq_complete_request() here. */ cmd->error = BLK_STS_TIMEOUT; - if (cmd->fake_timeout) + if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL) blk_mq_complete_request(rq); return BLK_EH_DONE; } From 5a674d9dc9a097308e8f5848b8439e8a3eeac846 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Apr 2022 09:01:21 -0500 Subject: [PATCH 641/708] dt-bindings: Fix array constraints on scalar properties Scalar properties shouldn't have array constraints (minItems, maxItems, items). These constraints can simply be dropped with any constraints under 'items' moved up a level. Cc: Agathe Porte Cc: Guenter Roeck Cc: Jean Delvare Cc: Krzysztof Kozlowski Cc: Olivier Moysan Cc: Arnaud Pouliquen Cc: Jonathan Cameron Cc: Lars-Peter Clausen Cc: Philipp Zabel Cc: Mauro Carvalho Chehab Cc: Bjorn Andersson Cc: Mathieu Poirier Cc: Mark Brown Cc: Fabrice Gasnier Cc: Yunfei Dong Cc: Geert Uytterhoeven Cc: linux-hwmon@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-iio@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linux-remoteproc@vger.kernel.org Cc: linux-spi@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Jonathan Cameron Reviewed-by: Philipp Zabel Acked-by: Mark Brown Reviewed-by: Geert Uytterhoeven Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20220413140121.3132837-1-robh@kernel.org --- Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml | 5 ++--- .../devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml | 4 +--- Documentation/devicetree/bindings/media/coda.yaml | 1 - .../devicetree/bindings/media/mediatek,vcodec-decoder.yaml | 2 -- .../devicetree/bindings/media/mediatek,vcodec-encoder.yaml | 2 -- .../bindings/media/mediatek,vcodec-subdev-decoder.yaml | 1 - .../devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml | 4 +--- Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml | 2 -- 8 files changed, 4 insertions(+), 17 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml index 801ca9ba7d34..e7493e25a7d2 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml @@ -58,9 +58,8 @@ patternProperties: The value (two's complement) to be programmed in the channel specific N correction register. For remote channels only. $ref: /schemas/types.yaml#/definitions/int32 - items: - minimum: -128 - maximum: 127 + minimum: -128 + maximum: 127 required: - reg diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml index 912372706280..92f9472a77ae 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml @@ -108,9 +108,7 @@ patternProperties: - [1-5]: order 1 to 5. For audio purpose it is recommended to use order 3 to 5. $ref: /schemas/types.yaml#/definitions/uint32 - items: - minimum: 0 - maximum: 5 + maximum: 5 "#io-channel-cells": const: 1 diff --git a/Documentation/devicetree/bindings/media/coda.yaml b/Documentation/devicetree/bindings/media/coda.yaml index 36781ee4617f..c9d5adbc8c4a 100644 --- a/Documentation/devicetree/bindings/media/coda.yaml +++ b/Documentation/devicetree/bindings/media/coda.yaml @@ -65,7 +65,6 @@ properties: iram: $ref: /schemas/types.yaml#/definitions/phandle description: phandle pointing to the SRAM device node - maxItems: 1 required: - compatible diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml index 9b179bb44dfb..aa55ca65d6ed 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml @@ -63,13 +63,11 @@ properties: mediatek,vpu: $ref: /schemas/types.yaml#/definitions/phandle - maxItems: 1 description: Describes point to vpu. mediatek,scp: $ref: /schemas/types.yaml#/definitions/phandle - maxItems: 1 description: Describes point to scp. diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml index df7df06c378f..deb5b657a2d5 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml @@ -55,13 +55,11 @@ properties: mediatek,vpu: $ref: /schemas/types.yaml#/definitions/phandle - maxItems: 1 description: Describes point to vpu. mediatek,scp: $ref: /schemas/types.yaml#/definitions/phandle - maxItems: 1 description: Describes point to scp. diff --git a/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml b/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml index 7687be0f50aa..c73bf2352aca 100644 --- a/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml +++ b/Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml @@ -61,7 +61,6 @@ properties: mediatek,scp: $ref: /schemas/types.yaml#/definitions/phandle - maxItems: 1 description: | The node of system control processor (SCP), using the remoteproc & rpmsg framework. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml index 2424de733ee4..d99a729d2710 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml @@ -104,8 +104,7 @@ properties: qcom,smem-state-names: $ref: /schemas/types.yaml#/definitions/string description: The names of the state bits used for SMP2P output - items: - - const: stop + const: stop glink-edge: type: object @@ -130,7 +129,6 @@ properties: qcom,remote-pid: $ref: /schemas/types.yaml#/definitions/uint32 description: ID of the shared memory used by GLINK for communication with WPSS - maxItems: 1 required: - interrupts diff --git a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml index b104899205f6..5de710adfa63 100644 --- a/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml +++ b/Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml @@ -124,7 +124,6 @@ properties: description: | Override the default TX fifo size. Unit is words. Ignored if 0. $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 default: 64 renesas,rx-fifo-size: @@ -132,7 +131,6 @@ properties: description: | Override the default RX fifo size. Unit is words. Ignored if 0. $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 default: 64 required: From 558bd89edc06051c978dc8b0a9771941b406a9fd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 7 Apr 2022 17:51:07 -0500 Subject: [PATCH 642/708] ARM: dts: Fix more boolean properties with values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Reviewed-by: Claudiu Beznea Reviewed-by: Bjorn Andersson Cc: Krzysztof Kozlowski Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: "Benoît Cousson" Cc: Tony Lindgren Cc: Andy Gross Cc: Bjorn Andersson Cc: linux-arm-kernel@lists.infradead.org Cc: linux-omap@vger.kernel.org Cc: linux-arm-msm@vger.kernel.org Link: https://lore.kernel.org/r/20220407225107.2175958-1-robh@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/at91-sam9_l9260.dts | 2 +- arch/arm/boot/dts/imx28-ts4600.dts | 2 +- arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi | 4 ++-- arch/arm/boot/dts/qcom-ipq8064.dtsi | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/at91-sam9_l9260.dts b/arch/arm/boot/dts/at91-sam9_l9260.dts index 1e2a28c2f365..2fb51b9aca2a 100644 --- a/arch/arm/boot/dts/at91-sam9_l9260.dts +++ b/arch/arm/boot/dts/at91-sam9_l9260.dts @@ -101,7 +101,7 @@ nand0: nand@40000000 { nand-bus-width = <8>; nand-ecc-mode = "soft"; - nand-on-flash-bbt = <1>; + nand-on-flash-bbt; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx28-ts4600.dts b/arch/arm/boot/dts/imx28-ts4600.dts index 097ec35c62d8..0d58da1c0cc5 100644 --- a/arch/arm/boot/dts/imx28-ts4600.dts +++ b/arch/arm/boot/dts/imx28-ts4600.dts @@ -26,7 +26,7 @@ pinctrl-0 = <&mmc0_4bit_pins_a &mmc0_sck_cfg &en_sd_pwr>; - broken-cd = <1>; + broken-cd; bus-width = <4>; vmmc-supply = <®_vddio_sd0>; status = "okay"; diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index b4664ab00256..d3da8b1b473b 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -118,8 +118,8 @@ gpmc,device-width = <2>; gpmc,wait-pin = <0>; gpmc,burst-length = <4>; - gpmc,cycle2cycle-samecsen = <1>; - gpmc,cycle2cycle-diffcsen = <1>; + gpmc,cycle2cycle-samecsen; + gpmc,cycle2cycle-diffcsen; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <45>; gpmc,cs-wr-off-ns = <45>; diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi index 996f4458d9fc..8cb04aa8ed2f 100644 --- a/arch/arm/boot/dts/qcom-ipq8064.dtsi +++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi @@ -972,7 +972,7 @@ snps,axi-config = <&stmmac_axi_setup>; snps,pbl = <32>; - snps,aal = <1>; + snps,aal; qcom,nss-common = <&nss_common>; qcom,qsgmii-csr = <&qsgmii_csr>; @@ -996,7 +996,7 @@ snps,axi-config = <&stmmac_axi_setup>; snps,pbl = <32>; - snps,aal = <1>; + snps,aal; qcom,nss-common = <&nss_common>; qcom,qsgmii-csr = <&qsgmii_csr>; @@ -1020,7 +1020,7 @@ snps,axi-config = <&stmmac_axi_setup>; snps,pbl = <32>; - snps,aal = <1>; + snps,aal; qcom,nss-common = <&nss_common>; qcom,qsgmii-csr = <&qsgmii_csr>; @@ -1044,7 +1044,7 @@ snps,axi-config = <&stmmac_axi_setup>; snps,pbl = <32>; - snps,aal = <1>; + snps,aal; qcom,nss-common = <&nss_common>; qcom,qsgmii-csr = <&qsgmii_csr>; From 56147a156e7e2f50bef695efe6cc4fe8e91c40dc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 7 Apr 2022 16:30:27 +0200 Subject: [PATCH 643/708] ARM: dts: align SPI NOR node name with dtschema The node names should be generic and SPI NOR dtschema expects "flash". Signed-off-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20220407143027.294678-1-krzysztof.kozlowski@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/da850-evm.dts | 2 +- arch/arm/boot/dts/dm8168-evm.dts | 2 +- arch/arm/boot/dts/spear1310-evb.dts | 2 +- arch/arm/boot/dts/spear1340-evb.dts | 2 +- arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi | 2 +- arch/arm/boot/dts/stm32mp157c-ev1.dts | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index 87c517d65f62..e9aecac4f5b5 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -278,7 +278,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&spi1_pins &spi1_cs0_pin>; - flash: m25p80@0 { + flash: flash@0 { #address-cells = <1>; #size-cells = <1>; compatible = "jedec,spi-nor"; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index 5126e2d72ed7..778796c10af8 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -177,7 +177,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mcspi1_pins>; - m25p80@0 { + flash@0 { compatible = "w25x32"; spi-max-frequency = <48000000>; reg = <0>; diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index 4cbadcb41084..ddd1cf4d0554 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -379,7 +379,7 @@ }; }; - m25p80@1 { + flash@1 { compatible = "st,m25p80"; reg = <1>; spi-max-frequency = <12000000>; diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts index fd194ebeedc9..3a51a41eb5e4 100644 --- a/arch/arm/boot/dts/spear1340-evb.dts +++ b/arch/arm/boot/dts/spear1340-evb.dts @@ -439,7 +439,7 @@ cs-gpios = <&gpiopinctrl 80 0>, <&gpiopinctrl 24 0>, <&gpiopinctrl 85 0>; - m25p80@0 { + flash@0 { compatible = "m25p80"; reg = <0>; spi-max-frequency = <12000000>; diff --git a/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi b/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi index 33ae5e0590df..ac53ee3c496b 100644 --- a/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi +++ b/arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi @@ -398,7 +398,7 @@ #size-cells = <0>; status = "okay"; - flash0: is25lp016d@0 { + flash0: flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <133000000>; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts index e222d2d2cb44..d142dd30e16b 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts @@ -262,7 +262,7 @@ #size-cells = <0>; status = "okay"; - flash0: mx66l51235l@0 { + flash0: flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-rx-bus-width = <4>; @@ -271,7 +271,7 @@ #size-cells = <1>; }; - flash1: mx66l51235l@1 { + flash1: flash@1 { compatible = "jedec,spi-nor"; reg = <1>; spi-rx-bus-width = <4>; From 4f3d7e5a0b6d95e9763d7285435e2b7809feedff Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 7 Apr 2022 17:52:54 -0500 Subject: [PATCH 644/708] arm64: dts: qcom/sdm845-shift-axolotl: Fix boolean properties with values Boolean properties in DT are present or not present and don't take a value. A property such as 'foo = <0>;' evaluated to true. IOW, the value doesn't matter. It may have been intended that 0 values are false, but there is no change in behavior with this patch. Signed-off-by: Rob Herring Cc: Andy Gross Cc: Bjorn Andersson Cc: Krzysztof Kozlowski Cc: linux-arm-msm@vger.kernel.org Link: https://lore.kernel.org/r/20220407225254.2178644-1-robh@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts index 8553c8bf79bd..103cc40816fd 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts @@ -563,7 +563,7 @@ config { pins = "gpio6", "gpio11"; drive-strength = <8>; - bias-disable = <0>; + bias-disable; }; }; From 24a4351e1c04de8e580bf06cae7f3a79094fe7f0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 10 Apr 2022 22:28:37 +0200 Subject: [PATCH 645/708] ARM: config: Update Gemini defconfig The Gemini defconfig needs to be updated due to DSA driver Kconfig changes in the v5.18 merge window: CONFIG_NET_DSA_REALTEK_SMI is now behind CONFIG_NET_DSA_REALTEK and the desired DSA switch need to be selected explicitly with CONFIG_NET_DSA_REALTEK_RTL8366RB. Take this opportunity to update some other minor config options: - CONFIG_MARVELL_PHY moved around because of Kconfig changes. - CONFIG_SENSORS_DRIVETEMP should be selected since is regulates the system critical alert temperature on some devices, which is nice if it is handled even if initramfs or root fails to mount. Fixes: 319a70a5fea9 ("net: dsa: realtek-smi: move to subdirectory") Fixes: 765c39a4fafe ("net: dsa: realtek: convert subdrivers into modules") Signed-off-by: Linus Walleij Cc: Hans Ulli Kroll Cc: Luiz Angelo Daros de Luca Signed-off-by: Arnd Bergmann --- arch/arm/configs/gemini_defconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig index a7acfee11ffc..a80bc8a43091 100644 --- a/arch/arm/configs/gemini_defconfig +++ b/arch/arm/configs/gemini_defconfig @@ -49,11 +49,13 @@ CONFIG_ATA=y CONFIG_PATA_FTIDE010=y CONFIG_NETDEVICES=y CONFIG_TUN=y +CONFIG_NET_DSA_REALTEK=y CONFIG_NET_DSA_REALTEK_SMI=y +CONFIG_NET_DSA_REALTEK_RTL8366RB=y CONFIG_GEMINI_ETHERNET=y +CONFIG_MARVELL_PHY=y CONFIG_MDIO_BITBANG=y CONFIG_MDIO_GPIO=y -CONFIG_MARVELL_PHY=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set @@ -66,6 +68,7 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_I2C_GPIO=y CONFIG_SPI=y CONFIG_SPI_GPIO=y +CONFIG_SENSORS_DRIVETEMP=y CONFIG_SENSORS_GPIO_FAN=y CONFIG_SENSORS_LM75=y CONFIG_THERMAL=y From 652980b1541c5a02e6410647c7daf840c06d724a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Apr 2022 09:00:15 -0500 Subject: [PATCH 646/708] dt-bindings: display: panel-timing: Define a single type for properties It's not good practice to define multiple types for the same property, so factor out the type reference making the properties always an uint32-array with a length of 1 or 3 items. Signed-off-by: Rob Herring Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20220413140016.3131013-1-robh@kernel.org --- .../bindings/display/panel/panel-timing.yaml | 42 ++++++++----------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml index 9bf592dc3033..7749de95ee40 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml @@ -71,78 +71,72 @@ properties: hfront-porch: description: Horizontal front porch panel timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of pixels - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of pixels hback-porch: description: Horizontal back porch timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of pixels - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of pixels hsync-len: description: Horizontal sync length panel timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of pixels - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of pixels vfront-porch: description: Vertical front porch panel timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of lines - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of lines vback-porch: description: Vertical back porch panel timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of lines - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of lines vsync-len: description: Vertical sync length panel timing + $ref: /schemas/types.yaml#/definitions/uint32-array oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - maxItems: 1 + - maxItems: 1 items: description: typical number of lines - - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 3 + - minItems: 3 maxItems: 3 items: description: min, typ, max number of lines From b3d4650d82c71b9c9a8184de9e8bb656012b289e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 14 Apr 2022 13:57:35 +1000 Subject: [PATCH 647/708] VFS: filename_create(): fix incorrect intent. When asked to create a path ending '/', but which is not to be a directory (LOOKUP_DIRECTORY not set), filename_create() will never try to create the file. If it doesn't exist, -ENOENT is reported. However, it still passes LOOKUP_CREATE|LOOKUP_EXCL to the filesystems ->lookup() function, even though there is no intent to create. This is misleading and can cause incorrect behaviour. If you try ln -s foo /path/dir/ where 'dir' is a directory on an NFS filesystem which is not currently known in the dcache, this will fail with ENOENT. But as the name is not in the dcache, nfs_lookup gets called with LOOKUP_CREATE|LOOKUP_EXCL and so it returns NULL without performing any lookup, with the expectation that a subsequent call to create the target will be made, and the lookup can be combined with the creation. In the case with a trailing '/' and no LOOKUP_DIRECTORY, that call is never made. Instead filename_create() sees that the dentry is not (yet) positive and returns -ENOENT - even though the directory actually exists. So only set LOOKUP_CREATE|LOOKUP_EXCL if there really is an intent to create, and use the absence of these flags to decide if -ENOENT should be returned. Note that filename_parentat() is only interested in LOOKUP_REVAL, so we split that out and store it in 'reval_flag'. __lookup_hash() then gets reval_flag combined with whatever create flags were determined to be needed. Reviewed-by: David Disseldorp Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/namei.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 3f1829b3ab5b..509657fdf4f5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3673,18 +3673,14 @@ static struct dentry *filename_create(int dfd, struct filename *name, { struct dentry *dentry = ERR_PTR(-EEXIST); struct qstr last; + bool want_dir = lookup_flags & LOOKUP_DIRECTORY; + unsigned int reval_flag = lookup_flags & LOOKUP_REVAL; + unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL; int type; int err2; int error; - bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); - /* - * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any - * other flags passed in are ignored! - */ - lookup_flags &= LOOKUP_REVAL; - - error = filename_parentat(dfd, name, lookup_flags, path, &last, &type); + error = filename_parentat(dfd, name, reval_flag, path, &last, &type); if (error) return ERR_PTR(error); @@ -3698,11 +3694,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, /* don't fail immediately if it's r/o, at least try to report other errors */ err2 = mnt_want_write(path->mnt); /* - * Do the final lookup. + * Do the final lookup. Suppress 'create' if there is a trailing + * '/', and a directory wasn't requested. */ - lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL; + if (last.name[last.len] && !want_dir) + create_flags = 0; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - dentry = __lookup_hash(&last, path->dentry, lookup_flags); + dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; @@ -3716,7 +3714,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ - if (unlikely(!is_dir && last.name[last.len])) { + if (unlikely(!create_flags)) { error = -ENOENT; goto fail; } From 7dd06a2548b2bf516ef2e79873a9cdd00b354b99 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 14 Apr 2022 11:52:54 -0400 Subject: [PATCH 648/708] dm: allow dm_accept_partial_bio() for dm_io without duplicate bios The intent behind commit e6fc9f62ce6e ("dm: flag clones created by __send_duplicate_bios") was to formally disallow the use of dm_accept_partial_bio() where it simply isn't possible -- due to constraint that multiple bios cannot meaningfully update a shared tio->len_ptr. But that commit went too far and disallowed the case where "abormal" IO (e.g. WRITE_ZEROES) is only using a single bio. Fix this by not marking a dm_io with a single dm_target_io (and bio), that happens to be created by __send_duplicate_bios, as DM_TIO_IS_DUPLICATE_BIO. Also remove 'unsigned *len' parameter from alloc_multiple_bios(). This commit fixes a dm_accept_partial_bio() BUG_ON() with dm-zoned when a WRITE_ZEROES bio is issued. Fixes: 655f3aad7aa4 ("dm: switch dm_target_io booleans over to proper flags") Reported-by: Shinichiro Kawasaki Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c5fad7c4ee6..fc1f9583a271 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1323,8 +1323,7 @@ static void __map_bio(struct bio *clone) } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, - struct dm_target *ti, unsigned num_bios, - unsigned *len) + struct dm_target *ti, unsigned num_bios) { struct bio *bio; int try; @@ -1335,7 +1334,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - bio = alloc_tio(ci, ti, bio_nr, len, + bio = alloc_tio(ci, ti, bio_nr, NULL, try ? GFP_NOIO : GFP_NOWAIT); if (!bio) break; @@ -1363,11 +1362,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); break; default: - alloc_multiple_bios(&blist, ci, ti, num_bios, len); + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + alloc_multiple_bios(&blist, ci, ti, num_bios); while ((clone = bio_list_pop(&blist))) { dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); @@ -1407,14 +1406,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target len = min_t(sector_t, ci->sector_count, max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); - /* - * dm_accept_partial_bio cannot be used with duplicate bios, - * so update clone_info cursor before __send_duplicate_bios(). - */ + __send_duplicate_bios(ci, ti, num_bios, &len); + ci->sector += len; ci->sector_count -= len; - - __send_duplicate_bios(ci, ti, num_bios, &len); } static bool is_abnormal_io(struct bio *bio) From 701521403cfb228536b3947035c8a6eca40d8e58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Apr 2022 20:23:40 -0600 Subject: [PATCH 649/708] io_uring: abort file assignment prior to assigning creds We need to either restore creds properly if we fail on the file assignment, or just do the file assignment first instead. Let's do the latter as it's simpler, should make no difference here for file assignment. Link: https://lore.kernel.org/lkml/000000000000a7edb305dca75a50@google.com/ Reported-by: syzbot+60c52ca98513a8760a91@syzkaller.appspotmail.com Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ab674a0d269b..4479013854d2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7111,13 +7111,14 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) const struct cred *creds = NULL; int ret; + if (unlikely(!io_assign_file(req, issue_flags))) + return -EBADF; + if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) creds = override_creds(req->creds); if (!io_op_defs[req->opcode].audit_skip) audit_uring_entry(req->opcode); - if (unlikely(!io_assign_file(req, issue_flags))) - return -EBADF; switch (req->opcode) { case IORING_OP_NOP: From b42b6f4485e3f0970e11f73df6202eeaf9f53a3e Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 10 Apr 2022 20:12:49 -0700 Subject: [PATCH 650/708] nvme: don't print verbose errors for internal passthrough requests Use the RQF_QUIET flag to skip the newly added verbose error reporting, and set the flag in __nvme_submit_sync_cmd, which is used for most internal passthrough requests where we do expect errors (e.g. due to probing for optional functionality). This is similar to what the SCSI verbose error logging does. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Alan Adamson Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Tested-by: Alan Adamson Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index efb85c6d8e2d..be9fc9818e65 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -366,7 +366,7 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS)) + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); @@ -1015,6 +1015,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } + req->rq_flags |= RQF_QUIET; ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; From 00ff400e6deee00f7b15e200205b2708b63b8cf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Apr 2022 08:05:27 +0200 Subject: [PATCH 651/708] nvme: add a quirk to disable namespace identifiers Add a quirk to disable using and exporting namespace identifiers for controllers where they are broken beyond repair. The most directly visible problem with non-unique namespace identifiers is that they break the /dev/disk/by-id/ links, with the link for a supposedly unique identifier now pointing to one of multiple possible namespaces that share the same ID, and a somewhat random selection of which one actually shows up. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 24 ++++++++++++++++++------ drivers/nvme/host/nvme.h | 5 +++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index be9fc9818e65..e1846d04817f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1288,6 +1288,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_EUI64_LEN; memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); return NVME_NIDT_EUI64_LEN; case NVME_NIDT_NGUID: @@ -1296,6 +1298,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_NGUID_LEN; memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); return NVME_NIDT_NGUID_LEN; case NVME_NIDT_UUID: @@ -1304,6 +1308,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_UUID_LEN; uuid_copy(&ids->uuid, data + sizeof(*cur)); return NVME_NIDT_UUID_LEN; case NVME_NIDT_CSI: @@ -1400,12 +1406,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); - if (ctrl->vs >= NVME_VS(1, 2, 0) && - !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { + dev_info(ctrl->device, + "Ignoring bogus Namespace Identifiers\n"); + } else { + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + } return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1393bbf82d71..a2b53ca63335 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -144,6 +144,11 @@ enum nvme_quirks { * encoding the generation sequence number. */ NVME_QUIRK_SKIP_CID_GEN = (1 << 17), + + /* + * Reports garbage in the namespace identifiers (eui64, nguid, uuid). + */ + NVME_QUIRK_BOGUS_NID = (1 << 18), }; /* From a98a945b80f8684121d477ae68ebc01da953da1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Apr 2022 08:05:27 +0200 Subject: [PATCH 652/708] nvme-pci: disable namespace identifiers for the MAXIO MAP1002/1202 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MAXIO MAP1002/1202 controllers reports completely bogus Namespace identifiers that even change after suspend cycles. Disable using the Identifiers entirely. Reported-by: 金韬 Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Tested-by: 金韬 --- drivers/nvme/host/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d817ca17463e..c45dbe8a7dcd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3447,6 +3447,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From 66dd346b84d79fde20832ed691a54f4881eac20d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Apr 2022 07:07:56 +0200 Subject: [PATCH 653/708] nvme-pci: disable namespace identifiers for Qemu controllers Qemu unconditionally reports a UUID, which depending on the qemu version is either all-null (which is incorrect but harmless) or contains a single bit set for all controllers. In addition it can also optionally report a eui64 which needs to be manually set. Disable namespace identifiers for Qemu controlles entirely even if in some cases they could be set correctly through manual intervention. Reported-by: Luis Chamberlain Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c45dbe8a7dcd..3aacf1c0d5a5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3409,7 +3409,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS | - NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + NVME_QUIRK_DISABLE_WRITE_ZEROES | + NVME_QUIRK_BOGUS_NID, }, + { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ From ccf16413e520164eb718cf8b22a30438da80ff23 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Thu, 14 Apr 2022 15:40:56 -0700 Subject: [PATCH 654/708] block/compat_ioctl: fix range check in BLKGETSIZE kernel ulong and compat_ulong_t may not be same width. Use type directly to eliminate mismatches. This would result in truncation rather than EFBIG for 32bit mode for large disks. Reviewed-by: Bart Van Assche Signed-off-by: Khazhismel Kumykov Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220414224056.2875681-1-khazhy@google.com Signed-off-by: Jens Axboe --- block/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/ioctl.c b/block/ioctl.c index 4a86340133e4..f8703db99c73 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -629,7 +629,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_put_long(argp, (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: - if (bdev_nr_sectors(bdev) > ~0UL) + if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) return -EFBIG; return compat_put_ulong(argp, bdev_nr_sectors(bdev)); From 3d973a76e54c30772e72128ab0552ca75e588893 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Mar 2022 17:38:15 +0100 Subject: [PATCH 655/708] block: don't print I/O error warning for dead disks When a disk has been marked dead, don't print warnings for I/O errors as they are very much expected. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220323163815.1526998-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ed3ed86f7dd2..c4370d276170 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -794,7 +794,8 @@ bool blk_update_request(struct request *req, blk_status_t error, #endif if (unlikely(error && !blk_rq_is_passthrough(req) && - !(req->rq_flags & RQF_QUIET))) { + !(req->rq_flags & RQF_QUIET)) && + !test_bit(GD_DEAD, &req->q->disk->state)) { blk_print_req_error(req, error); trace_block_rq_error(req, error, nr_bytes); } From 92b914e29af3e99589f2d2876616c0b534892ed4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 15 Apr 2022 17:45:13 +0900 Subject: [PATCH 656/708] dm: fix bio length of empty flush The commit 92986f6b4c8a ("dm: use bio_clone_fast in alloc_io/alloc_tio") removed bio_clone_fast() call from alloc_tio() when ci->io->tio is available. In this case, ci->bio is not copied to ci->io->tio.clone. This is fine since init_clone_info() sets same values to ci->bio and ci->io->tio.clone. However, when incoming bios have REQ_PREFLUSH flag, __send_empty_flush() prepares a zero length bio on stack and set it to ci->bio. At this time, ci->io->tio.clone still keeps non-zero length. When alloc_tio() chooses this ci->io->tio.clone as the bio to map, it is passed to targets as non-empty flush bio. It causes bio length check failure in dm-zoned and unexpected operation such as dm_accept_partial_bio() call. To avoid the non-empty flush bio, set zero length to ci->io->tio.clone in __send_empty_flush(). Fixes: 92986f6b4c8a ("dm: use bio_clone_fast in alloc_io/alloc_tio") Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fc1f9583a271..82957bd460e8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1391,6 +1391,7 @@ static void __send_empty_flush(struct clone_info *ci) ci->bio = &flush_bio; ci->sector_count = 0; + ci->io->tio.clone.bi_iter.bi_size = 0; while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); From bd8963e602c77adc76dbbbfc3417c3cf14fed76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Tue, 29 Mar 2022 20:38:17 +0200 Subject: [PATCH 657/708] i2c: pasemi: Wait for write xfers to finish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wait for completion of write transfers before returning from the driver. At first sight it may seem advantageous to leave write transfers queued for the controller to carry out on its own time, but there's a couple of issues with it: * Driver doesn't check for FIFO space. * The queued writes can complete while the driver is in its I2C read transfer path which means it will get confused by the raising of XEN (the 'transaction ended' signal). This can cause a spurious ENODATA error due to premature reading of the MRXFIFO register. Adding the wait fixes some unreliability issues with the driver. There's some efficiency cost to it (especially with pasemi_smb_waitready doing its polling), but that will be alleviated once the driver receives interrupt support. Fixes: beb58aa39e6e ("i2c: PA Semi SMBus driver") Signed-off-by: Martin Povišer Reviewed-by: Sven Peter Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pasemi-core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 7728c8460dc0..9028ffb58cc0 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter, TXFIFO_WR(smbus, msg->buf[msg->len-1] | (stop ? MTXFIFO_STOP : 0)); + + if (stop) { + err = pasemi_smb_waitready(smbus); + if (err) + goto reset_out; + } } return 0; From 39c025721d706eb4fa8a5a7a376c93a3162b71bc Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 8 Apr 2022 19:15:24 +0200 Subject: [PATCH 658/708] i2c: imx: Implement errata ERR007805 or e7805 bus frequency limit The i.MX8MP Mask Set Errata for Mask 1P33A, Rev. 2.0 has description of errata ERR007805 as below. This errata is found on all MX8M{M,N,P,Q}, MX7{S,D}, MX6{UL{,L,Z},S{,LL,X},S,D,DL,Q,DP,QP} . MX7ULP, MX8Q, MX8X are not affected. MX53 and older status is unknown, as the errata first appears in MX6 errata sheets from 2016 and the latest errata sheet for MX53 is from 2015. Older SoC errata sheets predate the MX53 errata sheet. MX8ULP and MX9 status is unknown as the errata sheet is not available yet. " ERR007805 I2C: When the I2C clock speed is configured for 400 kHz, the SCL low period violates the I2C spec of 1.3 uS min Description: When the I2C module is programmed to operate at the maximum clock speed of 400 kHz (as defined by the I2C spec), the SCL clock low period violates the I2C spec of 1.3 uS min. The user must reduce the clock speed to obtain the SCL low time to meet the 1.3us I2C minimum required. This behavior means the SoC is not compliant to the I2C spec at 400kHz. Workaround: To meet the clock low period requirement in fast speed mode, SCL must be configured to 384KHz or less. " Implement the workaround by matching on the affected SoC specific compatible strings and by limiting the maximum bus frequency in case the SoC is affected. Signed-off-by: Marek Vasut To: linux-i2c@vger.kernel.org Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 27f969b3dc07..e9e2db68b9fb 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -179,6 +179,12 @@ struct imx_i2c_hwdata { unsigned int ndivs; unsigned int i2sr_clr_opcode; unsigned int i2cr_ien_opcode; + /* + * Errata ERR007805 or e7805: + * I2C: When the I2C clock speed is configured for 400 kHz, + * the SCL low period violates the I2C spec of 1.3 uS min. + */ + bool has_err007805; }; struct imx_i2c_dma { @@ -240,6 +246,16 @@ static const struct imx_i2c_hwdata imx21_i2c_hwdata = { }; +static const struct imx_i2c_hwdata imx6_i2c_hwdata = { + .devtype = IMX21_I2C, + .regshift = IMX_I2C_REGSHIFT, + .clk_div = imx_i2c_clk_div, + .ndivs = ARRAY_SIZE(imx_i2c_clk_div), + .i2sr_clr_opcode = I2SR_CLR_OPCODE_W0C, + .i2cr_ien_opcode = I2CR_IEN_OPCODE_1, + .has_err007805 = true, +}; + static struct imx_i2c_hwdata vf610_i2c_hwdata = { .devtype = VF610_I2C, .regshift = VF610_I2C_REGSHIFT, @@ -266,6 +282,16 @@ MODULE_DEVICE_TABLE(platform, imx_i2c_devtype); static const struct of_device_id i2c_imx_dt_ids[] = { { .compatible = "fsl,imx1-i2c", .data = &imx1_i2c_hwdata, }, { .compatible = "fsl,imx21-i2c", .data = &imx21_i2c_hwdata, }, + { .compatible = "fsl,imx6q-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sl-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sll-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sx-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6ul-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx7s-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mm-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mn-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mp-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mq-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,vf610-i2c", .data = &vf610_i2c_hwdata, }, { /* sentinel */ } }; @@ -551,6 +577,13 @@ static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx, unsigned int div; int i; + if (i2c_imx->hwdata->has_err007805 && i2c_imx->bitrate > 384000) { + dev_dbg(&i2c_imx->adapter.dev, + "SoC errata ERR007805 or e7805 applies, bus frequency limited from %d Hz to 384000 Hz.\n", + i2c_imx->bitrate); + i2c_imx->bitrate = 384000; + } + /* Divider value calculation */ if (i2c_imx->cur_clk == i2c_clk_rate) return; From c60bd968c74749a85c71dfed5fabd3e36d487d54 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 12 Apr 2022 14:26:01 -0700 Subject: [PATCH 659/708] i2c: qcom-geni: Use dev_err_probe() for GPI DMA error The GPI DMA engine driver can be compiled as a module, in which case the likely probe deferral "error" shows up in the kernel log. Switch to using dev_err_probe() to silence this warning and to ensure that "devices_deferred" in debugfs carries this information. Signed-off-by: Bjorn Andersson Reviewed-by: Vinod Koul Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-geni.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index fc1dcc19f2a1..5b920f0fc7dd 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -843,10 +843,8 @@ static int geni_i2c_probe(struct platform_device *pdev) /* FIFO is disabled, so we can only use GPI DMA */ gi2c->gpi_mode = true; ret = setup_gpi_dma(gi2c); - if (ret) { - dev_err(dev, "Failed to setup GPI DMA mode:%d ret\n", ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to setup GPI DMA mode\n"); dev_dbg(dev, "Using GPI DMA mode for I2C\n"); } else { From 993eb48fa199b5f476df8204e652eff63dd19361 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Apr 2022 21:07:51 +0300 Subject: [PATCH 660/708] i2c: dev: check return value when calling dev_set_name() If dev_set_name() fails, the dev_name() is null, check the return value of dev_set_name() to avoid the null-ptr-deref. Fixes: 1413ef638aba ("i2c: dev: Fix the race between the release of i2c_dev and cdev") Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index cf5d049342ea..6fd2b6718b08 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -668,16 +668,21 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy) i2c_dev->dev.class = i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; - dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + + res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + if (res) + goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); - if (res) { - put_i2c_dev(i2c_dev, false); - return res; - } + if (res) + goto err_put_i2c_dev; pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr); return 0; + +err_put_i2c_dev: + put_i2c_dev(i2c_dev, false); + return res; } static int i2cdev_detach_adapter(struct device *dev, void *dummy) From aef80e2fbe3ec6264e935cebfb8fa3367cb1e7b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Apr 2022 21:07:52 +0300 Subject: [PATCH 661/708] i2c: dev: Force case user pointers in compat_i2cdev_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sparse has warned us about wrong address space for user pointers: i2c-dev.c:561:50: warning: incorrect type in initializer (different address spaces) i2c-dev.c:561:50: expected unsigned char [usertype] *buf i2c-dev.c:561:50: got void [noderef] __user * Force cast the pointer to (__u8 *) that is used by I²C core code. Note, this is an additional fix to the previously addressed similar issue in the I2C_RDWR case in the same function. Fixes: 3265a7e6b41b ("i2c: dev: Add __user annotation") Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 6fd2b6718b08..ab0adaa130da 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -557,7 +557,7 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo .addr = umsg.addr, .flags = umsg.flags, .len = umsg.len, - .buf = compat_ptr(umsg.buf) + .buf = (__force __u8 *)compat_ptr(umsg.buf), }; } From e35c93695c742b88f5fe32063674551440c63d08 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:11 +0200 Subject: [PATCH 662/708] i2c: ismt: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/i2c/busses/i2c-ismt.c: In function ‘ismt_hw_init’: drivers/i2c/busses/i2c-ismt.c:770:2: error: case label does not reduce to an integer constant case ISMT_SPGT_SPD_400K: ^~~~ drivers/i2c/busses/i2c-ismt.c:773:2: error: case label does not reduce to an integer constant case ISMT_SPGT_SPD_1M: ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Signed-off-by: Borislav Petkov Reviewed-by: Seth Heasley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index f4820fd3dc13..c0364314877e 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -145,8 +145,8 @@ #define ISMT_SPGT_SPD_MASK 0xc0000000 /* SMBus Speed mask */ #define ISMT_SPGT_SPD_80K 0x00 /* 80 kHz */ #define ISMT_SPGT_SPD_100K (0x1 << 30) /* 100 kHz */ -#define ISMT_SPGT_SPD_400K (0x2 << 30) /* 400 kHz */ -#define ISMT_SPGT_SPD_1M (0x3 << 30) /* 1 MHz */ +#define ISMT_SPGT_SPD_400K (0x2U << 30) /* 400 kHz */ +#define ISMT_SPGT_SPD_1M (0x3U << 30) /* 1 MHz */ /* MSI Control Register (MSICTL) bit definitions */ From 7fbd166a8f2d697c3e2b4c8432d33253f00266b3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 14 Apr 2022 19:13:24 -0700 Subject: [PATCH 663/708] MAINTAINERS: Broadcom internal lists aren't maintainers Convert the broadcom internal list M: and L: entries to R: as exploder email addresses are neither maintainers nor mailing lists. Reorder the entries as necessary. Link: https://lkml.kernel.org/r/04eb301f5b3adbefdd78e76657eff0acb3e3d87f.camel@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 64 ++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7341667e7313..d76c9aa1d38a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3743,7 +3743,7 @@ F: include/linux/platform_data/b53.h BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Nicolas Saenz Julienne -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -3758,7 +3758,7 @@ BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE M: Florian Fainelli M: Ray Jui M: Scott Branden -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Maintained T: git git://github.com/broadcom/mach-bcm F: arch/arm/mach-bcm/ @@ -3778,7 +3778,7 @@ F: arch/mips/include/asm/mach-bcm47xx/* BROADCOM BCM4908 ETHERNET DRIVER M: Rafał Miłecki -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml @@ -3787,7 +3787,7 @@ F: drivers/net/ethernet/broadcom/unimac.h BROADCOM BCM4908 PINMUX DRIVER M: Rafał Miłecki -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pinctrl/brcm,bcm4908-pinctrl.yaml @@ -3797,7 +3797,7 @@ BROADCOM BCM5301X ARM ARCHITECTURE M: Florian Fainelli M: Hauke Mehrtens M: Rafał Miłecki -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/boot/dts/bcm470* @@ -3808,7 +3808,7 @@ F: arch/arm/mach-bcm/bcm_5301x.c BROADCOM BCM53573 ARM ARCHITECTURE M: Florian Fainelli M: Rafał Miłecki -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/boot/dts/bcm47189* @@ -3816,7 +3816,7 @@ F: arch/arm/boot/dts/bcm53573* BROADCOM BCM63XX ARM ARCHITECTURE M: Florian Fainelli -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://github.com/broadcom/stblinux.git @@ -3830,7 +3830,7 @@ F: drivers/usb/gadget/udc/bcm63xx_udc.* BROADCOM BCM7XXX ARM ARCHITECTURE M: Florian Fainelli -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://github.com/broadcom/stblinux.git @@ -3848,21 +3848,21 @@ N: bcm7120 BROADCOM BDC DRIVER M: Al Cooper L: linux-usb@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Maintained F: Documentation/devicetree/bindings/usb/brcm,bdc.yaml F: drivers/usb/gadget/udc/bdc/ BROADCOM BMIPS CPUFREQ DRIVER M: Markus Mayer -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-pm@vger.kernel.org S: Maintained F: drivers/cpufreq/bmips-cpufreq.c BROADCOM BMIPS MIPS ARCHITECTURE M: Florian Fainelli -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-mips@vger.kernel.org S: Maintained T: git git://github.com/broadcom/stblinux.git @@ -3928,53 +3928,53 @@ F: drivers/net/wireless/broadcom/brcm80211/ BROADCOM BRCMSTB GPIO DRIVER M: Doug Berger M: Florian Fainelli -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Supported F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml F: drivers/gpio/gpio-brcmstb.c BROADCOM BRCMSTB I2C DRIVER M: Kamal Dasu +R: Broadcom Kernel Team L: linux-i2c@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Supported F: Documentation/devicetree/bindings/i2c/brcm,brcmstb-i2c.yaml F: drivers/i2c/busses/i2c-brcmstb.c BROADCOM BRCMSTB UART DRIVER M: Al Cooper +R: Broadcom Kernel Team L: linux-serial@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml F: drivers/tty/serial/8250/8250_bcm7271.c BROADCOM BRCMSTB USB EHCI DRIVER M: Al Cooper +R: Broadcom Kernel Team L: linux-usb@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: Documentation/devicetree/bindings/usb/brcm,bcm7445-ehci.yaml F: drivers/usb/host/ehci-brcm.* BROADCOM BRCMSTB USB PIN MAP DRIVER M: Al Cooper +R: Broadcom Kernel Team L: linux-usb@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: Documentation/devicetree/bindings/usb/brcm,usb-pinmap.yaml F: drivers/usb/misc/brcmstb-usb-pinmap.c BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER M: Al Cooper +R: Broadcom Kernel Team L: linux-kernel@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/phy/broadcom/phy-brcm-usb* BROADCOM ETHERNET PHY DRIVERS M: Florian Fainelli -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: netdev@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt @@ -3985,7 +3985,7 @@ F: include/linux/brcmphy.h BROADCOM GENET ETHERNET DRIVER M: Doug Berger M: Florian Fainelli -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: netdev@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml @@ -3999,7 +3999,7 @@ F: include/linux/platform_data/mdio-bcm-unimac.h BROADCOM IPROC ARM ARCHITECTURE M: Ray Jui M: Scott Branden -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://github.com/broadcom/stblinux.git @@ -4027,7 +4027,7 @@ N: stingray BROADCOM IPROC GBIT ETHERNET DRIVER M: Rafał Miłecki -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/brcm,amac.yaml @@ -4036,7 +4036,7 @@ F: drivers/net/ethernet/broadcom/unimac.h BROADCOM KONA GPIO DRIVER M: Ray Jui -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Supported F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt F: drivers/gpio/gpio-bcm-kona.c @@ -4069,7 +4069,7 @@ F: drivers/firmware/broadcom/* BROADCOM PMB (POWER MANAGEMENT BUS) DRIVER M: Rafał Miłecki M: Florian Fainelli -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-pm@vger.kernel.org S: Maintained T: git git://github.com/broadcom/stblinux.git @@ -4085,7 +4085,7 @@ F: include/linux/bcma/ BROADCOM SPI DRIVER M: Kamal Dasu -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Maintained F: Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.yaml F: drivers/spi/spi-bcm-qspi.* @@ -4094,7 +4094,7 @@ F: drivers/spi/spi-iproc-qspi.c BROADCOM STB AVS CPUFREQ DRIVER M: Markus Mayer -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt @@ -4102,7 +4102,7 @@ F: drivers/cpufreq/brcmstb* BROADCOM STB AVS TMON DRIVER M: Markus Mayer -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml @@ -4110,7 +4110,7 @@ F: drivers/thermal/broadcom/brcmstb* BROADCOM STB DPFE DRIVER M: Markus Mayer -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml @@ -4119,8 +4119,8 @@ F: drivers/memory/brcmstb_dpfe.c BROADCOM STB NAND FLASH DRIVER M: Brian Norris M: Kamal Dasu +R: Broadcom Kernel Team L: linux-mtd@lists.infradead.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/mtd/nand/raw/brcmnand/ F: include/linux/platform_data/brcmnand.h @@ -4129,7 +4129,7 @@ BROADCOM STB PCIE DRIVER M: Jim Quinlan M: Nicolas Saenz Julienne M: Florian Fainelli -M: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml @@ -4137,7 +4137,7 @@ F: drivers/pci/controller/pcie-brcmstb.c BROADCOM SYSTEMPORT ETHERNET DRIVER M: Florian Fainelli -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bcmsysport.* @@ -4154,7 +4154,7 @@ F: drivers/net/ethernet/broadcom/tg3.* BROADCOM VK DRIVER M: Scott Branden -L: bcm-kernel-feedback-list@broadcom.com +R: Broadcom Kernel Team S: Supported F: drivers/misc/bcm-vk/ F: include/uapi/linux/misc/bcm_vk.h @@ -17648,8 +17648,8 @@ K: \bTIF_SECCOMP\b SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER M: Al Cooper +R: Broadcom Kernel Team L: linux-mmc@vger.kernel.org -L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/mmc/host/sdhci-brcmstb* From 1bdec44b1eee32e311b44b5b06144bb7d9b33938 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 14 Apr 2022 19:13:27 -0700 Subject: [PATCH 664/708] tmpfs: fix regressions from wider use of ZERO_PAGE Chuck Lever reported fsx-based xfstests generic 075 091 112 127 failing when 5.18-rc1 NFS server exports tmpfs: bisected to recent tmpfs change. Whilst nfsd_splice_action() does contain some questionable handling of repeated pages, and Chuck was able to work around there, history from Mark Hemment makes clear that there might be similar dangers elsewhere: it was not a good idea for me to pass ZERO_PAGE down to unknown actors. Revert shmem_file_read_iter() to using ZERO_PAGE for holes only when iter_is_iovec(); in other cases, use the more natural iov_iter_zero() instead of copy_page_to_iter(). We would use iov_iter_zero() throughout, but the x86 clear_user() is not nearly so well optimized as copy to user (dd of 1T sparse tmpfs file takes 57 seconds rather than 44 seconds). And now pagecache_init() does not need to SetPageUptodate(ZERO_PAGE(0)): which had caused boot failure on arm noMMU STM32F7 and STM32H7 boards Link: https://lkml.kernel.org/r/9a978571-8648-e830-5735-1f4748ce2e30@google.com Fixes: 56a8c8eb1eaf ("tmpfs: do not allocate pages on read") Signed-off-by: Hugh Dickins Reported-by: Patrice CHOTARD Reported-by: Chuck Lever III Tested-by: Chuck Lever III Cc: Mark Hemment Cc: Patrice CHOTARD Cc: Mikulas Patocka Cc: Lukas Czerner Cc: Christoph Hellwig Cc: "Darrick J. Wong" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 6 ------ mm/shmem.c | 31 ++++++++++++++++++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 3a5ffb5587cd..9a1eef6c5d35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1063,12 +1063,6 @@ void __init pagecache_init(void) init_waitqueue_head(&folio_wait_table[i]); page_writeback_init(); - - /* - * tmpfs uses the ZERO_PAGE for reading holes: it is up-to-date, - * and splice's page_cache_pipe_buf_confirm() needs to see that. - */ - SetPageUptodate(ZERO_PAGE(0)); } /* diff --git a/mm/shmem.c b/mm/shmem.c index 529c9ad3e926..4b2fea33158e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2513,7 +2513,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) pgoff_t end_index; unsigned long nr, ret; loff_t i_size = i_size_read(inode); - bool got_page; end_index = i_size >> PAGE_SHIFT; if (index > end_index) @@ -2570,24 +2569,34 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) */ if (!offset) mark_page_accessed(page); - got_page = true; + /* + * Ok, we have the page, and it's up-to-date, so + * now we can copy it to user space... + */ + ret = copy_page_to_iter(page, offset, nr, to); + put_page(page); + + } else if (iter_is_iovec(to)) { + /* + * Copy to user tends to be so well optimized, but + * clear_user() not so much, that it is noticeably + * faster to copy the zero page instead of clearing. + */ + ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to); } else { - page = ZERO_PAGE(0); - got_page = false; + /* + * But submitting the same page twice in a row to + * splice() - or others? - can result in confusion: + * so don't attempt that optimization on pipes etc. + */ + ret = iov_iter_zero(nr, to); } - /* - * Ok, we have the page, and it's up-to-date, so - * now we can copy it to user space... - */ - ret = copy_page_to_iter(page, offset, nr, to); retval += ret; offset += ret; index += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; - if (got_page) - put_page(page); if (!iov_iter_count(to)) break; if (ret < nr) { From f9b141f93659e09a52e28791ccbaf69c273b8e92 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 14 Apr 2022 19:13:31 -0700 Subject: [PATCH 665/708] mm/secretmem: fix panic when growing a memfd_secret When one tries to grow an existing memfd_secret with ftruncate, one gets a panic [1]. For example, doing the following reliably induces the panic: fd = memfd_secret(); ftruncate(fd, 10); ptr = mmap(NULL, 10, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); strcpy(ptr, "123456789"); munmap(ptr, 10); ftruncate(fd, 20); The basic reason for this is, when we grow with ftruncate, we call down into simple_setattr, and then truncate_inode_pages_range, and eventually we try to zero part of the memory. The normal truncation code does this via the direct map (i.e., it calls page_address() and hands that to memset()). For memfd_secret though, we specifically don't map our pages via the direct map (i.e. we call set_direct_map_invalid_noflush() on every fault). So the address returned by page_address() isn't useful, and when we try to memset() with it we panic. This patch avoids the panic by implementing a custom setattr for memfd_secret, which detects resizes specifically (setting the size for the first time works just fine, since there are no existing pages to try to zero), and rejects them with EINVAL. One could argue growing should be supported, but I think that will require a significantly more lengthy change. So, I propose a minimal fix for the benefit of stable kernels, and then perhaps to extend memfd_secret to support growing in a separate patch. [1]: BUG: unable to handle page fault for address: ffffa0a889277028 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD afa01067 P4D afa01067 PUD 83f909067 PMD 83f8bf067 PTE 800ffffef6d88060 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 0 PID: 281 Comm: repro Not tainted 5.17.0-dbg-DEV #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:memset_erms+0x9/0x10 Code: c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 f3 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 aa 4c 89 c8 c3 90 49 89 fa 40 0f b6 ce 48 b8 01 01 01 01 01 01 RSP: 0018:ffffb932c09afbf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffda63c4249dc0 RCX: 0000000000000fd8 RDX: 0000000000000fd8 RSI: 0000000000000000 RDI: ffffa0a889277028 RBP: ffffb932c09afc00 R08: 0000000000001000 R09: ffffa0a889277028 R10: 0000000000020023 R11: 0000000000000000 R12: ffffda63c4249dc0 R13: ffffa0a890d70d98 R14: 0000000000000028 R15: 0000000000000fd8 FS: 00007f7294899580(0000) GS:ffffa0af9bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffa0a889277028 CR3: 0000000107ef6006 CR4: 0000000000370ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? zero_user_segments+0x82/0x190 truncate_inode_partial_folio+0xd4/0x2a0 truncate_inode_pages_range+0x380/0x830 truncate_setsize+0x63/0x80 simple_setattr+0x37/0x60 notify_change+0x3d8/0x4d0 do_sys_ftruncate+0x162/0x1d0 __x64_sys_ftruncate+0x1c/0x20 do_syscall_64+0x44/0xa0 entry_SYSCALL_64_after_hwframe+0x44/0xae Modules linked in: xhci_pci xhci_hcd virtio_net net_failover failover virtio_blk virtio_balloon uhci_hcd ohci_pci ohci_hcd evdev ehci_pci ehci_hcd 9pnet_virtio 9p netfs 9pnet CR2: ffffa0a889277028 [lkp@intel.com: secretmem_iops can be static] Signed-off-by: kernel test robot [axelrasmussen@google.com: return EINVAL] Link: https://lkml.kernel.org/r/20220324210909.1843814-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220412193023.279320-1-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Mike Rapoport Cc: Matthew Wilcox Cc: Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index 098638d3b8a4..3b3cf2892b6a 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -158,6 +158,22 @@ const struct address_space_operations secretmem_aops = { .isolate_page = secretmem_isolate_page, }; +static int secretmem_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = d_inode(dentry); + unsigned int ia_valid = iattr->ia_valid; + + if ((ia_valid & ATTR_SIZE) && inode->i_size) + return -EINVAL; + + return simple_setattr(mnt_userns, dentry, iattr); +} + +static const struct inode_operations secretmem_iops = { + .setattr = secretmem_setattr, +}; + static struct vfsmount *secretmem_mnt; static struct file *secretmem_file_create(unsigned long flags) @@ -177,6 +193,7 @@ static struct file *secretmem_file_create(unsigned long flags) mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); + inode->i_op = &secretmem_iops; inode->i_mapping->a_ops = &secretmem_aops; /* pretend we are a normal file with zero size */ From 25934fcfb93c4687ad32fd3d062bcf03457129d4 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 14 Apr 2022 19:13:34 -0700 Subject: [PATCH 666/708] irq_work: use kasan_record_aux_stack_noalloc() record callstack On PREEMPT_RT kernel and KASAN is enabled. the kasan_record_aux_stack() may call alloc_pages(), and the rt-spinlock will be acquired, if currently in atomic context, will trigger warning: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 239, name: bootlogd Preemption disabled at: [] rt_mutex_slowunlock+0xa1/0x4e0 CPU: 3 PID: 239 Comm: bootlogd Tainted: G W 5.17.1-rt17-yocto-preempt-rt+ #105 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 Call Trace: __might_resched.cold+0x13b/0x173 rt_spin_lock+0x5b/0xf0 get_page_from_freelist+0x20c/0x1610 __alloc_pages+0x25e/0x5e0 __stack_depot_save+0x3c0/0x4a0 kasan_save_stack+0x3a/0x50 __kasan_record_aux_stack+0xb6/0xc0 kasan_record_aux_stack+0xe/0x10 irq_work_queue_on+0x6a/0x1c0 pull_rt_task+0x631/0x6b0 do_balance_callbacks+0x56/0x80 __balance_callbacks+0x63/0x90 rt_mutex_setprio+0x349/0x880 rt_mutex_slowunlock+0x22a/0x4e0 rt_spin_unlock+0x49/0x80 uart_write+0x186/0x2b0 do_output_char+0x2e9/0x3a0 n_tty_write+0x306/0x800 file_tty_write.isra.0+0x2af/0x450 tty_write+0x22/0x30 new_sync_write+0x27c/0x3a0 vfs_write+0x3f7/0x5d0 ksys_write+0xd9/0x180 __x64_sys_write+0x43/0x50 do_syscall_64+0x44/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Fix it by using kasan_record_aux_stack_noalloc() to avoid the call to alloc_pages(). Link: https://lkml.kernel.org/r/20220402142555.2699582-1-qiang1.zhang@intel.com Signed-off-by: Zqiang Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/irq_work.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f7df715ec28e..7afa40fe5cc4 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -137,7 +137,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (!irq_work_claim(work)) return false; - kasan_record_aux_stack(work); + kasan_record_aux_stack_noalloc(work); preempt_disable(); if (cpu != smp_processor_id()) { From b1add418d457bc30cdd6a89c2bba092cd8941fcf Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Thu, 14 Apr 2022 19:13:37 -0700 Subject: [PATCH 667/708] kasan: fix hw tags enablement when KUNIT tests are disabled Kasan enables hw tags via kasan_enable_tagging() which based on the mode passed via kernel command line selects the correct hw backend. kasan_enable_tagging() is meant to be invoked indirectly via the cpu features framework of the architectures that support these backends. Currently the invocation of this function is guarded by CONFIG_KASAN_KUNIT_TEST which allows the enablement of the correct backend only when KUNIT tests are enabled in the kernel. This inconsistency was introduced in commit: ed6d74446cbf ("kasan: test: support async (again) and asymm modes for HW_TAGS") ... and prevents to enable MTE on arm64 when KUNIT tests for kasan hw_tags are disabled. Fix the issue making sure that the CONFIG_KASAN_KUNIT_TEST guard does not prevent the correct invocation of kasan_enable_tagging(). Link: https://lkml.kernel.org/r/20220408124323.10028-1-vincenzo.frascino@arm.com Fixes: ed6d74446cbf ("kasan: test: support async (again) and asymm modes for HW_TAGS") Signed-off-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/hw_tags.c | 5 +++-- mm/kasan/kasan.h | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 07a76c46daa5..9e1b6544bfa8 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -336,8 +336,6 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size) #endif -#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) - void kasan_enable_tagging(void) { if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) @@ -347,6 +345,9 @@ void kasan_enable_tagging(void) else hw_enable_tagging_sync(); } + +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) + EXPORT_SYMBOL_GPL(kasan_enable_tagging); void kasan_force_async_fault(void) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d79b83d673b1..b01b4bbe0409 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -355,25 +355,27 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_set_mem_tag_range(addr, size, tag, init) \ arch_set_mem_tag_range((addr), (size), (tag), (init)) +void kasan_enable_tagging(void); + #else /* CONFIG_KASAN_HW_TAGS */ #define hw_enable_tagging_sync() #define hw_enable_tagging_async() #define hw_enable_tagging_asymm() +static inline void kasan_enable_tagging(void) { } + #endif /* CONFIG_KASAN_HW_TAGS */ #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -void kasan_enable_tagging(void); void kasan_force_async_fault(void); -#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ +#else /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */ -static inline void kasan_enable_tagging(void) { } static inline void kasan_force_async_fault(void) { } -#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ +#endif /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */ #ifdef CONFIG_KASAN_SW_TAGS u8 kasan_random_tag(void); From 2dfe63e61cc31ee59ce951672b0850b5229cd5b0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Apr 2022 19:13:40 -0700 Subject: [PATCH 668/708] mm, kfence: support kmem_dump_obj() for KFENCE objects Calling kmem_obj_info() via kmem_dump_obj() on KFENCE objects has been producing garbage data due to the object not actually being maintained by SLAB or SLUB. Fix this by implementing __kfence_obj_info() that copies relevant information to struct kmem_obj_info when the object was allocated by KFENCE; this is called by a common kmem_obj_info(), which also calls the slab/slub/slob specific variant now called __kmem_obj_info(). For completeness, kmem_dump_obj() now displays if the object was allocated by KFENCE. Link: https://lore.kernel.org/all/20220323090520.GG16885@xsang-OptiPlex-9020/ Link: https://lkml.kernel.org/r/20220406131558.3558585-1-elver@google.com Fixes: b89fb5ef0ce6 ("mm, kfence: insert KFENCE hooks for SLUB") Fixes: d3fb45f370d9 ("mm, kfence: insert KFENCE hooks for SLAB") Signed-off-by: Marco Elver Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reported-by: kernel test robot Acked-by: Vlastimil Babka [slab] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfence.h | 24 +++++++++++++++++++++ mm/kfence/core.c | 21 ------------------- mm/kfence/kfence.h | 21 +++++++++++++++++++ mm/kfence/report.c | 47 ++++++++++++++++++++++++++++++++++++++++++ mm/slab.c | 2 +- mm/slab.h | 2 +- mm/slab_common.c | 9 ++++++++ mm/slob.c | 2 +- mm/slub.c | 2 +- 9 files changed, 105 insertions(+), 25 deletions(-) diff --git a/include/linux/kfence.h b/include/linux/kfence.h index f49e64222628..726857a4b680 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -204,6 +204,22 @@ static __always_inline __must_check bool kfence_free(void *addr) */ bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs); +#ifdef CONFIG_PRINTK +struct kmem_obj_info; +/** + * __kfence_obj_info() - fill kmem_obj_info struct + * @kpp: kmem_obj_info to be filled + * @object: the object + * + * Return: + * * false - not a KFENCE object + * * true - a KFENCE object, filled @kpp + * + * Copies information to @kpp for KFENCE objects. + */ +bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); +#endif + #else /* CONFIG_KFENCE */ static inline bool is_kfence_address(const void *addr) { return false; } @@ -221,6 +237,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo return false; } +#ifdef CONFIG_PRINTK +struct kmem_obj_info; +static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + return false; +} +#endif + #endif #endif /* _LINUX_KFENCE_H */ diff --git a/mm/kfence/core.c b/mm/kfence/core.c index a203747ad2c0..9b2b5f56f4ae 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -231,27 +231,6 @@ static bool kfence_unprotect(unsigned long addr) return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false)); } -static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) -{ - long index; - - /* The checks do not affect performance; only called from slow-paths. */ - - if (!is_kfence_address((void *)addr)) - return NULL; - - /* - * May be an invalid index if called with an address at the edge of - * __kfence_pool, in which case we would report an "invalid access" - * error. - */ - index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; - if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) - return NULL; - - return &kfence_metadata[index]; -} - static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta) { unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2; diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index 9a6c4b1b12a8..600f2e2431d6 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -96,6 +96,27 @@ struct kfence_metadata { extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; +static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) +{ + long index; + + /* The checks do not affect performance; only called from slow-paths. */ + + if (!is_kfence_address((void *)addr)) + return NULL; + + /* + * May be an invalid index if called with an address at the edge of + * __kfence_pool, in which case we would report an "invalid access" + * error. + */ + index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; + if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) + return NULL; + + return &kfence_metadata[index]; +} + /* KFENCE error types for report generation. */ enum kfence_error_type { KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */ diff --git a/mm/kfence/report.c b/mm/kfence/report.c index f93a7b2a338b..f5a6d8ba3e21 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -273,3 +273,50 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r /* We encountered a memory safety error, taint the kernel! */ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); } + +#ifdef CONFIG_PRINTK +static void kfence_to_kp_stack(const struct kfence_track *track, void **kp_stack) +{ + int i, j; + + i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL); + for (j = 0; i < track->num_stack_entries && j < KS_ADDRS_COUNT; ++i, ++j) + kp_stack[j] = (void *)track->stack_entries[i]; + if (j < KS_ADDRS_COUNT) + kp_stack[j] = NULL; +} + +bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + struct kfence_metadata *meta = addr_to_metadata((unsigned long)object); + unsigned long flags; + + if (!meta) + return false; + + /* + * If state is UNUSED at least show the pointer requested; the rest + * would be garbage data. + */ + kpp->kp_ptr = object; + + /* Requesting info an a never-used object is almost certainly a bug. */ + if (WARN_ON(meta->state == KFENCE_OBJECT_UNUSED)) + return true; + + raw_spin_lock_irqsave(&meta->lock, flags); + + kpp->kp_slab = slab; + kpp->kp_slab_cache = meta->cache; + kpp->kp_objp = (void *)meta->addr; + kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack); + if (meta->state == KFENCE_OBJECT_FREED) + kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack); + /* get_stack_skipnr() ensures the first entry is outside allocator. */ + kpp->kp_ret = kpp->kp_stack[0]; + + raw_spin_unlock_irqrestore(&meta->lock, flags); + + return true; +} +#endif diff --git a/mm/slab.c b/mm/slab.c index b04e40078bdf..0edb474edef1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3665,7 +3665,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif /* CONFIG_NUMA */ #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { struct kmem_cache *cachep; unsigned int objnr; diff --git a/mm/slab.h b/mm/slab.h index fd7ae2024897..95eb34174c1b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -868,7 +868,7 @@ struct kmem_obj_info { void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); #endif #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR diff --git a/mm/slab_common.c b/mm/slab_common.c index 6ee64d6208b3..2b3206a2c3b5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -555,6 +555,13 @@ bool kmem_valid_obj(void *object) } EXPORT_SYMBOL_GPL(kmem_valid_obj); +static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + if (__kfence_obj_info(kpp, object, slab)) + return; + __kmem_obj_info(kpp, object, slab); +} + /** * kmem_dump_obj - Print available slab provenance information * @object: slab object for which to find provenance information. @@ -590,6 +597,8 @@ void kmem_dump_obj(void *object) pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name); else pr_cont(" slab%s", cp); + if (is_kfence_address(object)) + pr_cont(" (kfence)"); if (kp.kp_objp) pr_cont(" start %px", kp.kp_objp); if (kp.kp_data_offset) diff --git a/mm/slob.c b/mm/slob.c index dfa6808dff36..40ea6e2d4ccd 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -463,7 +463,7 @@ out: } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { kpp->kp_ptr = object; kpp->kp_slab = slab; diff --git a/mm/slub.c b/mm/slub.c index 74d92aa4a3a2..ed5c2c03a47a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4312,7 +4312,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { void *base; int __maybe_unused i; From e553f62f10d93551eb883eca227ac54d1a4fad84 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 14 Apr 2022 19:13:43 -0700 Subject: [PATCH 669/708] mm, page_alloc: fix build_zonerefs_node() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6aa303defb74 ("mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator") only zones with free memory are included in a built zonelist. This is problematic when e.g. all memory of a zone has been ballooned out when zonelists are being rebuilt. The decision whether to rebuild the zonelists when onlining new memory is done based on populated_zone() returning 0 for the zone the memory will be added to. The new zone is added to the zonelists only, if it has free memory pages (managed_zone() returns a non-zero value) after the memory has been onlined. This implies, that onlining memory will always free the added pages to the allocator immediately, but this is not true in all cases: when e.g. running as a Xen guest the onlined new memory will be added only to the ballooned memory list, it will be freed only when the guest is being ballooned up afterwards. Another problem with using managed_zone() for the decision whether a zone is being added to the zonelists is, that a zone with all memory used will in fact be removed from all zonelists in case the zonelists happen to be rebuilt. Use populated_zone() when building a zonelist as it has been done before that commit. There was a report that QubesOS (based on Xen) is hitting this problem. Xen has switched to use the zone device functionality in kernel 5.9 and QubesOS wants to use memory hotplugging for guests in order to be able to start a guest with minimal memory and expand it as needed. This was the report leading to the patch. Link: https://lkml.kernel.org/r/20220407120637.9035-1-jgross@suse.com Fixes: 6aa303defb74 ("mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator") Signed-off-by: Juergen Gross Reported-by: Marek Marczykowski-Górecki Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Marek Marczykowski-Górecki Reviewed-by: Wei Yang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e5b4488a0c5..33ca8cab21e6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6131,7 +6131,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) do { zone_type--; zone = pgdat->node_zones + zone_type; - if (managed_zone(zone)) { + if (populated_zone(zone)) { zoneref_set_zone(zone, &zonerefs[nr_zones++]); check_highest_zone(zone_type); } From e914d8f00391520ecc4495dd0ca0124538ab7119 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 14 Apr 2022 19:13:46 -0700 Subject: [PATCH 670/708] mm: fix unexpected zeroed page mapping with zram swap Two processes under CLONE_VM cloning, user process can be corrupted by seeing zeroed page unexpectedly. CPU A CPU B do_swap_page do_swap_page SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path swap_readpage valid data swap_slot_free_notify delete zram entry swap_readpage zeroed(invalid) data pte_lock map the *zero data* to userspace pte_unlock pte_lock if (!pte_same) goto out_nomap; pte_unlock return and next refault will read zeroed data The swap_slot_free_notify is bogus for CLONE_VM case since it doesn't increase the refcount of swap slot at copy_mm so it couldn't catch up whether it's safe or not to discard data from backing device. In the case, only the lock it could rely on to synchronize swap slot freeing is page table lock. Thus, this patch gets rid of the swap_slot_free_notify function. With this patch, CPU A will see correct data. CPU A CPU B do_swap_page do_swap_page SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path swap_readpage original data pte_lock map the original data swap_free swap_range_free bd_disk->fops->swap_slot_free_notify swap_readpage read zeroed data pte_unlock pte_lock if (!pte_same) goto out_nomap; pte_unlock return on next refault will see mapped data by CPU B The concern of the patch would increase memory consumption since it could keep wasted memory with compressed form in zram as well as uncompressed form in address space. However, most of cases of zram uses no readahead and do_swap_page is followed by swap_free so it will free the compressed form from in zram quickly. Link: https://lkml.kernel.org/r/YjTVVxIAsnKAXjTd@google.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: Ivan Babrou Tested-by: Ivan Babrou Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Jens Axboe Cc: David Hildenbrand Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 54 ---------------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index b417f000b49e..89fbf3cae30f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -51,54 +51,6 @@ void end_swap_bio_write(struct bio *bio) bio_put(bio); } -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (data_race(!(sis->flags & SWP_BLKDEV))) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } -} - static void end_swap_bio_read(struct bio *bio) { struct page *page = bio_first_page_all(bio); @@ -114,7 +66,6 @@ static void end_swap_bio_read(struct bio *bio) } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); @@ -394,11 +345,6 @@ int swap_readpage(struct page *page, bool synchronous) if (sis->flags & SWP_SYNCHRONOUS_IO) { ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); - } - count_vm_event(PSWPIN); goto out; } From 31ca72fa7540bb654b55c56adaf99305847376e0 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Thu, 14 Apr 2022 19:13:49 -0700 Subject: [PATCH 671/708] mm: compaction: fix compiler warning when CONFIG_COMPACTION=n The below warning is reported when CONFIG_COMPACTION=n: mm/compaction.c:56:27: warning: 'HPAGE_FRAG_CHECK_INTERVAL_MSEC' defined but not used [-Wunused-const-variable=] 56 | static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it by moving 'HPAGE_FRAG_CHECK_INTERVAL_MSEC' under CONFIG_COMPACTION defconfig. Also since this is just a 'static const int' type, use #define for it. Link: https://lkml.kernel.org/r/1647608518-20924-1-git-send-email-quic_charante@quicinc.com Signed-off-by: Charan Teja Kalla Reported-by: kernel test robot Acked-by: Vlastimil Babka Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index c3e37aa9ff9e..fe915db6149b 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -26,6 +26,11 @@ #include "internal.h" #ifdef CONFIG_COMPACTION +/* + * Fragmentation score check interval for proactive compaction purposes. + */ +#define HPAGE_FRAG_CHECK_INTERVAL_MSEC (500) + static inline void count_compact_event(enum vm_event_item item) { count_vm_event(item); @@ -50,11 +55,6 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) #define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) -/* - * Fragmentation score check interval for proactive compaction purposes. - */ -static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; - /* * Page order with-respect-to which proactive compaction * calculates external fragmentation, which is used as From 5a317412ef884763fdf7aa17f9f3636959d11d8f Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 14 Apr 2022 19:13:52 -0700 Subject: [PATCH 672/708] hugetlb: do not demote poisoned hugetlb pages It is possible for poisoned hugetlb pages to reside on the free lists. The huge page allocation routines which dequeue entries from the free lists make a point of avoiding poisoned pages. There is no such check and avoidance in the demote code path. If a hugetlb page on the is on a free list, poison will only be set in the head page rather then the page with the actual error. If such a page is demoted, then the poison flag may follow the wrong page. A page without error could have poison set, and a page with poison could not have the flag set. Check for poison before attempting to demote a hugetlb page. Also, return -EBUSY to the caller if only poisoned pages are on the free list. Link: https://lkml.kernel.org/r/20220307215707.50916-1-mike.kravetz@oracle.com Fixes: 8531fc6f52f5 ("hugetlb: add hugetlb demote page support") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b34f50156f7e..f8ca7cca3c1a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3475,7 +3475,6 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) { int nr_nodes, node; struct page *page; - int rc = 0; lockdep_assert_held(&hugetlb_lock); @@ -3486,15 +3485,19 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) } for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { - if (!list_empty(&h->hugepage_freelists[node])) { - page = list_entry(h->hugepage_freelists[node].next, - struct page, lru); - rc = demote_free_huge_page(h, page); - break; + list_for_each_entry(page, &h->hugepage_freelists[node], lru) { + if (PageHWPoison(page)) + continue; + + return demote_free_huge_page(h, page); } } - return rc; + /* + * Only way to get here is if all pages on free lists are poisoned. + * Return -EBUSY so that caller will not retry. + */ + return -EBUSY; } #define HSTATE_ATTR_RO(_name) \ From 354e923df042a11d1ab8ca06b3ebfab3a018a4ec Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Apr 2022 19:13:55 -0700 Subject: [PATCH 673/708] revert "fs/binfmt_elf: fix PT_LOAD p_align values for loaders" Commit 925346c129da11 ("fs/binfmt_elf: fix PT_LOAD p_align values for loaders") was an attempt to fix regressions due to 9630f0d60fec5f ("fs/binfmt_elf: use PT_LOAD p_align values for static PIE"). But regressionss continue to be reported: https://lore.kernel.org/lkml/cb5b81bd-9882-e5dc-cd22-54bdbaaefbbc@leemhuis.info/ https://bugzilla.kernel.org/show_bug.cgi?id=215720 https://lkml.kernel.org/r/b685f3d0-da34-531d-1aa9-479accd3e21b@leemhuis.info This patch reverts the fix, so the original can also be reverted. Fixes: 925346c129da11 ("fs/binfmt_elf: fix PT_LOAD p_align values for loaders") Cc: H.J. Lu Cc: Chris Kennelly Cc: Al Viro Cc: Alexey Dobriyan Cc: Song Liu Cc: David Rientjes Cc: Ian Rogers Cc: Hugh Dickins Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Fangrui Song Cc: Nick Desaulniers Cc: Kirill A. Shutemov Cc: Mike Kravetz Cc: Shuah Khan Cc: Thorsten Leemhuis Cc: Mike Rapoport Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6556e13ed95f..37d9c455d535 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1118,7 +1118,7 @@ out_free_interp: * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - if (interpreter || alignment > ELF_MIN_ALIGN) { + if (alignment > ELF_MIN_ALIGN) { load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); From aeb7923733d100b86c6bc68e7ae32913b0cec9d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Apr 2022 19:13:58 -0700 Subject: [PATCH 674/708] revert "fs/binfmt_elf: use PT_LOAD p_align values for static PIE" Despite Mike's attempted fix (925346c129da117122), regressions reports continue: https://lore.kernel.org/lkml/cb5b81bd-9882-e5dc-cd22-54bdbaaefbbc@leemhuis.info/ https://bugzilla.kernel.org/show_bug.cgi?id=215720 https://lkml.kernel.org/r/b685f3d0-da34-531d-1aa9-479accd3e21b@leemhuis.info So revert this patch. Fixes: 9630f0d60fec ("fs/binfmt_elf: use PT_LOAD p_align values for static PIE") Cc: Alexey Dobriyan Cc: Al Viro Cc: Chris Kennelly Cc: David Rientjes Cc: Fangrui Song Cc: H.J. Lu Cc: Hugh Dickins Cc: Ian Rogers Cc: Kirill A. Shutemov Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nick Desaulniers Cc: Sandeep Patil Cc: Shuah Khan Cc: Song Liu Cc: Suren Baghdasaryan Cc: Thorsten Leemhuis Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 37d9c455d535..63c7ebb0da89 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1117,11 +1117,11 @@ out_free_interp: * independently randomized mmap region (0 load_bias * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ - alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - if (alignment > ELF_MIN_ALIGN) { + if (interpreter) { load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); + alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); if (alignment) load_bias &= ~(alignment - 1); elf_flags |= MAP_FIXED_NOREPLACE; From c12cd77cb028255663810e6c4528f0325facff66 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 14 Apr 2022 19:14:01 -0700 Subject: [PATCH 675/708] mm/vmalloc: fix spinning drain_vmap_work after reading from /proc/vmcore Commit 3ee48b6af49c ("mm, x86: Saving vmcore with non-lazy freeing of vmas") introduced set_iounmap_nonlazy(), which sets vmap_lazy_nr to lazy_max_pages() + 1, ensuring that any future vunmaps() immediately purge the vmap areas instead of doing it lazily. Commit 690467c81b1a ("mm/vmalloc: Move draining areas out of caller context") moved the purging from the vunmap() caller to a worker thread. Unfortunately, set_iounmap_nonlazy() can cause the worker thread to spin (possibly forever). For example, consider the following scenario: 1. Thread reads from /proc/vmcore. This eventually calls __copy_oldmem_page() -> set_iounmap_nonlazy(), which sets vmap_lazy_nr to lazy_max_pages() + 1. 2. Then it calls free_vmap_area_noflush() (via iounmap()), which adds 2 pages (one page plus the guard page) to the purge list and vmap_lazy_nr. vmap_lazy_nr is now lazy_max_pages() + 3, so the drain_vmap_work is scheduled. 3. Thread returns from the kernel and is scheduled out. 4. Worker thread is scheduled in and calls drain_vmap_area_work(). It frees the 2 pages on the purge list. vmap_lazy_nr is now lazy_max_pages() + 1. 5. This is still over the threshold, so it tries to purge areas again, but doesn't find anything. 6. Repeat 5. If the system is running with only one CPU (which is typicial for kdump) and preemption is disabled, then this will never make forward progress: there aren't any more pages to purge, so it hangs. If there is more than one CPU or preemption is enabled, then the worker thread will spin forever in the background. (Note that if there were already pages to be purged at the time that set_iounmap_nonlazy() was called, this bug is avoided.) This can be reproduced with anything that reads from /proc/vmcore multiple times. E.g., vmcore-dmesg /proc/vmcore. It turns out that improvements to vmap() over the years have obsoleted the need for this "optimization". I benchmarked `dd if=/proc/vmcore of=/dev/null` with 4k and 1M read sizes on a system with a 32GB vmcore. The test was run on 5.17, 5.18-rc1 with a fix that avoided the hang, and 5.18-rc1 with set_iounmap_nonlazy() removed entirely: |5.17 |5.18+fix|5.18+removal 4k|40.86s| 40.09s| 26.73s 1M|24.47s| 23.98s| 21.84s The removal was the fastest (by a wide margin with 4k reads). This patch removes set_iounmap_nonlazy(). Link: https://lkml.kernel.org/r/52f819991051f9b865e9ce25605509bfdbacadcd.1649277321.git.osandov@fb.com Fixes: 690467c81b1a ("mm/vmalloc: Move draining areas out of caller context") Signed-off-by: Omar Sandoval Acked-by: Chris Down Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Christoph Hellwig Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/io.h | 2 -- arch/x86/kernel/crash_dump_64.c | 1 - mm/vmalloc.c | 11 ----------- 3 files changed, 14 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index f6d91ecb8026..e9736af126b2 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -210,8 +210,6 @@ void __iomem *ioremap(resource_size_t offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); #define iounmap iounmap -extern void set_iounmap_nonlazy(void); - #ifdef __KERNEL__ void memcpy_fromio(void *, const volatile void __iomem *, size_t); diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index a7f617a3981d..97529552dd24 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -37,7 +37,6 @@ static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, } else memcpy(buf, vaddr + offset, csize); - set_iounmap_nonlazy(); iounmap((void __iomem *)vaddr); return csize; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e163372d3967..0b17498a34f1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1671,17 +1671,6 @@ static DEFINE_MUTEX(vmap_purge_lock); /* for per-CPU blocks */ static void purge_fragmented_blocks_allcpus(void); -#ifdef CONFIG_X86_64 -/* - * called before a call to iounmap() if the caller wants vm_area_struct's - * immediately freed. - */ -void set_iounmap_nonlazy(void) -{ - atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1); -} -#endif /* CONFIG_X86_64 */ - /* * Purges all lazily-freed vmap areas. */ From 23c2d497de21f25898fbea70aeb292ab8acc8c94 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Thu, 14 Apr 2022 19:14:04 -0700 Subject: [PATCH 676/708] mm: kmemleak: take a full lowmem check in kmemleak_*_phys() The kmemleak_*_phys() apis do not check the address for lowmem's min boundary, while the caller may pass an address below lowmem, which will trigger an oops: # echo scan > /sys/kernel/debug/kmemleak Unable to handle kernel paging request at virtual address ff5fffffffe00000 Oops [#1] Modules linked in: CPU: 2 PID: 134 Comm: bash Not tainted 5.18.0-rc1-next-20220407 #33 Hardware name: riscv-virtio,qemu (DT) epc : scan_block+0x74/0x15c ra : scan_block+0x72/0x15c epc : ffffffff801e5806 ra : ffffffff801e5804 sp : ff200000104abc30 gp : ffffffff815cd4e8 tp : ff60000004cfa340 t0 : 0000000000000200 t1 : 00aaaaaac23954cc t2 : 00000000000003ff s0 : ff200000104abc90 s1 : ffffffff81b0ff28 a0 : 0000000000000000 a1 : ff5fffffffe01000 a2 : ffffffff81b0ff28 a3 : 0000000000000002 a4 : 0000000000000001 a5 : 0000000000000000 a6 : ff200000104abd7c a7 : 0000000000000005 s2 : ff5fffffffe00ff9 s3 : ffffffff815cd998 s4 : ffffffff815d0e90 s5 : ffffffff81b0ff28 s6 : 0000000000000020 s7 : ffffffff815d0eb0 s8 : ffffffffffffffff s9 : ff5fffffffe00000 s10: ff5fffffffe01000 s11: 0000000000000022 t3 : 00ffffffaa17db4c t4 : 000000000000000f t5 : 0000000000000001 t6 : 0000000000000000 status: 0000000000000100 badaddr: ff5fffffffe00000 cause: 000000000000000d scan_gray_list+0x12e/0x1a6 kmemleak_scan+0x2aa/0x57e kmemleak_write+0x32a/0x40c full_proxy_write+0x56/0x82 vfs_write+0xa6/0x2a6 ksys_write+0x6c/0xe2 sys_write+0x22/0x2a ret_from_syscall+0x0/0x2 The callers may not quite know the actual address they pass(e.g. from devicetree). So the kmemleak_*_phys() apis should guarantee the address they finally use is in lowmem range, so check the address for lowmem's min boundary. Link: https://lkml.kernel.org/r/20220413122925.33856-1-patrick.wang.shcn@gmail.com Signed-off-by: Patrick Wang Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index acd7cbb82e16..a182f5ddaf68 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1132,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan); void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_alloc(__va(phys), size, min_count, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1146,7 +1146,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_free_part(__va(phys), size); } EXPORT_SYMBOL(kmemleak_free_part_phys); @@ -1158,7 +1158,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); */ void __ref kmemleak_not_leak_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_not_leak(__va(phys)); } EXPORT_SYMBOL(kmemleak_not_leak_phys); @@ -1170,7 +1170,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_ignore(__va(phys)); } EXPORT_SYMBOL(kmemleak_ignore_phys); From 35a33ff3807d3adb9daaf937f5bca002ffa9f84e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 14 Apr 2022 01:50:38 +0200 Subject: [PATCH 677/708] random: use memmove instead of memcpy for remaining 32 bytes In order to immediately overwrite the old key on the stack, before servicing a userspace request for bytes, we use the remaining 32 bytes of block 0 as the key. This means moving indices 8,9,a,b,c,d,e,f -> 4,5,6,7,8,9,a,b. Since 4 < 8, for the kernel implementations of memcpy(), this doesn't actually appear to be a problem in practice. But relying on that characteristic seems a bit brittle. So let's change that to a proper memmove(), which is the by-the-books way of handling overlapping memory copies. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6b01b2be9dd4..3a293f919af9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -333,7 +333,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], chacha20_block(chacha_state, first_block); memcpy(key, first_block, CHACHA_KEY_SIZE); - memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memmove(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); memzero_explicit(first_block, sizeof(first_block)); } From b2d229d4ddb17db541098b83524d901257e93845 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Apr 2022 13:57:31 -0700 Subject: [PATCH 678/708] Linux 5.18-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 29e273d3f8cc..fa5112a0ec1b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Superb Owl # *DOCUMENTATION* From 6c846d026d490b2383d395bc8e7b06336219667b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:37 +0100 Subject: [PATCH 679/708] gpio: Don't fiddle with irqchips marked as immutable In order to move away from gpiolib messing with the internals of unsuspecting irqchips, add a flag by which irqchips advertise that they are not to be messed with, and do solemnly swear that they correctly call into the gpiolib helpers when required. Also nudge the users into converting their drivers to the new model. Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-2-maz@kernel.org --- drivers/gpio/gpiolib.c | 7 ++++++- include/linux/irq.h | 2 ++ kernel/irq/debugfs.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e59884cc12a7..48191e62a3cc 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1475,6 +1475,11 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc) { struct irq_chip *irqchip = gc->irq.chip; + if (irqchip->flags & IRQCHIP_IMMUTABLE) + return; + + chip_warn(gc, "not an immutable chip, please consider fixing it!\n"); + if (!irqchip->irq_request_resources && !irqchip->irq_release_resources) { irqchip->irq_request_resources = gpiochip_irq_reqres; @@ -1633,7 +1638,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc) irq_domain_remove(gc->irq.domain); } - if (irqchip) { + if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) { if (irqchip->irq_request_resources == gpiochip_irq_reqres) { irqchip->irq_request_resources = NULL; irqchip->irq_release_resources = NULL; diff --git a/include/linux/irq.h b/include/linux/irq.h index f92788ccdba2..505308253d23 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -569,6 +569,7 @@ struct irq_chip { * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup + * IRQCHIP_IMMUTABLE: Don't ever change anything in this chip */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -582,6 +583,7 @@ enum { IRQCHIP_SUPPORTS_NMI = (1 << 8), IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), + IRQCHIP_IMMUTABLE = (1 << 11), }; #include diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 2b43f5f5033d..bc8e40cf2b65 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -58,6 +58,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_IMMUTABLE), }; static void From 704f08753b6dcd0e08c1953af0b2c7f3fac87111 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:38 +0100 Subject: [PATCH 680/708] gpio: Expose the gpiochip_irq_re[ql]res helpers The GPIO subsystem has a couple of internal helpers to manage resources on behalf of the irqchip. Expose them so that GPIO drivers can use them directly. Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-3-maz@kernel.org --- drivers/gpio/gpiolib.c | 6 ++++-- include/linux/gpio/driver.h | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 48191e62a3cc..36e436a66e09 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1423,19 +1423,21 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) return irq_create_mapping(domain, offset); } -static int gpiochip_irq_reqres(struct irq_data *d) +int gpiochip_irq_reqres(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); return gpiochip_reqres_irq(gc, d->hwirq); } +EXPORT_SYMBOL(gpiochip_irq_reqres); -static void gpiochip_irq_relres(struct irq_data *d) +void gpiochip_irq_relres(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); gpiochip_relres_irq(gc, d->hwirq); } +EXPORT_SYMBOL(gpiochip_irq_relres); static void gpiochip_irq_mask(struct irq_data *d) { diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 98c93510640e..066bcfdf878d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -579,6 +579,10 @@ void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset); +/* irq_data versions of the above */ +int gpiochip_irq_reqres(struct irq_data *data); +void gpiochip_irq_relres(struct irq_data *data); + /* Line status inquiry for drivers */ bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); From 36b78aae4bfee749bbde73be570796bfd0f56bec Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:39 +0100 Subject: [PATCH 681/708] gpio: Add helpers to ease the transition towards immutable irq_chip Add a couple of new helpers to make it slightly simpler to convert drivers to immutable irq_chip structures: - GPIOCHIP_IRQ_RESOURCE_HELPERS populates the irq_chip structure with the resource management callbacks - gpio_irq_chip_set_chip() populates the gpio_irq_chip.chip structure, avoiding the proliferation of ugly casts Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-4-maz@kernel.org --- include/linux/gpio/driver.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 066bcfdf878d..832990099097 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -583,6 +583,18 @@ void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_irq_reqres(struct irq_data *data); void gpiochip_irq_relres(struct irq_data *data); +/* Paste this in your irq_chip structure */ +#define GPIOCHIP_IRQ_RESOURCE_HELPERS \ + .irq_request_resources = gpiochip_irq_reqres, \ + .irq_release_resources = gpiochip_irq_relres + +static inline void gpio_irq_chip_set_chip(struct gpio_irq_chip *girq, + const struct irq_chip *chip) +{ + /* Yes, dropping const is ugly, but it isn't like we have a choice */ + girq->chip = (struct irq_chip *)chip; +} + /* Line status inquiry for drivers */ bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); From bba00555ede79ad8a743da908f708466f6bac0f8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:40 +0100 Subject: [PATCH 682/708] gpio: tegra186: Make the irqchip immutable Prevent gpiolib from messing with the irqchip by advertising the irq_chip structure as immutable, making it const, and adding the various calls that gpiolib relies upon. Acked-by: Thierry Reding Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-5-maz@kernel.org --- drivers/gpio/gpio-tegra186.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 031fe105b58e..84c4f1e9fb0c 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -80,7 +80,6 @@ struct tegra_gpio_soc { struct tegra_gpio { struct gpio_chip gpio; - struct irq_chip intc; unsigned int num_irq; unsigned int *irq; @@ -372,6 +371,8 @@ static void tegra186_irq_mask(struct irq_data *data) value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG); value &= ~TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT; writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG); + + gpiochip_disable_irq(&gpio->gpio, data->hwirq); } static void tegra186_irq_unmask(struct irq_data *data) @@ -385,6 +386,8 @@ static void tegra186_irq_unmask(struct irq_data *data) if (WARN_ON(base == NULL)) return; + gpiochip_enable_irq(&gpio->gpio, data->hwirq); + value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG); value |= TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT; writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG); @@ -456,6 +459,24 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } +static void tegra186_irq_print_chip(struct irq_data *data, struct seq_file *p) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + + seq_printf(p, dev_name(gc->parent)); +} + +static const struct irq_chip tegra186_gpio_irq_chip = { + .irq_ack = tegra186_irq_ack, + .irq_mask = tegra186_irq_mask, + .irq_unmask = tegra186_irq_unmask, + .irq_set_type = tegra186_irq_set_type, + .irq_set_wake = tegra186_irq_set_wake, + .irq_print_chip = tegra186_irq_print_chip, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + static void tegra186_gpio_irq(struct irq_desc *desc) { struct tegra_gpio *gpio = irq_desc_get_handler_data(desc); @@ -760,15 +781,8 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->gpio.of_xlate = tegra186_gpio_of_xlate; #endif /* CONFIG_OF_GPIO */ - gpio->intc.name = dev_name(&pdev->dev); - gpio->intc.irq_ack = tegra186_irq_ack; - gpio->intc.irq_mask = tegra186_irq_mask; - gpio->intc.irq_unmask = tegra186_irq_unmask; - gpio->intc.irq_set_type = tegra186_irq_set_type; - gpio->intc.irq_set_wake = tegra186_irq_set_wake; - irq = &gpio->gpio.irq; - irq->chip = &gpio->intc; + gpio_irq_chip_set_chip(irq, &tegra186_gpio_irq_chip); irq->fwnode = of_node_to_fwnode(pdev->dev.of_node); irq->child_to_parent_hwirq = tegra186_gpio_child_to_parent_hwirq; irq->populate_parent_alloc_arg = tegra186_gpio_populate_parent_fwspec; From 15d8c14ac849f41f2d41dbddb69f402aaf73ff8b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:41 +0100 Subject: [PATCH 683/708] gpio: pl061: Make the irqchip immutable Prevent gpiolib from messing with the irqchip by advertising the irq_chip structure as immutable, making it const, and adding the various calls that gpiolib relies upon. Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-6-maz@kernel.org --- drivers/gpio/gpio-pl061.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 4ecab700f23f..6464056cb6ae 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -52,7 +52,6 @@ struct pl061 { void __iomem *base; struct gpio_chip gc; - struct irq_chip irq_chip; int parent_irq; #ifdef CONFIG_PM @@ -241,6 +240,8 @@ static void pl061_irq_mask(struct irq_data *d) gpioie = readb(pl061->base + GPIOIE) & ~mask; writeb(gpioie, pl061->base + GPIOIE); raw_spin_unlock(&pl061->lock); + + gpiochip_disable_irq(gc, d->hwirq); } static void pl061_irq_unmask(struct irq_data *d) @@ -250,6 +251,8 @@ static void pl061_irq_unmask(struct irq_data *d) u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR); u8 gpioie; + gpiochip_enable_irq(gc, d->hwirq); + raw_spin_lock(&pl061->lock); gpioie = readb(pl061->base + GPIOIE) | mask; writeb(gpioie, pl061->base + GPIOIE); @@ -283,6 +286,24 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned int state) return irq_set_irq_wake(pl061->parent_irq, state); } +static void pl061_irq_print_chip(struct irq_data *data, struct seq_file *p) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + + seq_printf(p, dev_name(gc->parent)); +} + +static const struct irq_chip pl061_irq_chip = { + .irq_ack = pl061_irq_ack, + .irq_mask = pl061_irq_mask, + .irq_unmask = pl061_irq_unmask, + .irq_set_type = pl061_irq_type, + .irq_set_wake = pl061_irq_set_wake, + .irq_print_chip = pl061_irq_print_chip, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + static int pl061_probe(struct amba_device *adev, const struct amba_id *id) { struct device *dev = &adev->dev; @@ -315,13 +336,6 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) /* * irq_chip support */ - pl061->irq_chip.name = dev_name(dev); - pl061->irq_chip.irq_ack = pl061_irq_ack; - pl061->irq_chip.irq_mask = pl061_irq_mask; - pl061->irq_chip.irq_unmask = pl061_irq_unmask; - pl061->irq_chip.irq_set_type = pl061_irq_type; - pl061->irq_chip.irq_set_wake = pl061_irq_set_wake; - writeb(0, pl061->base + GPIOIE); /* disable irqs */ irq = adev->irq[0]; if (!irq) @@ -329,7 +343,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) pl061->parent_irq = irq; girq = &pl061->gc.irq; - girq->chip = &pl061->irq_chip; + gpio_irq_chip_set_chip(girq, &pl061_irq_chip); girq->parent_handler = pl061_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents), From 374b87a0fcf9fa5dd1379271337ae19f95682956 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:42 +0100 Subject: [PATCH 684/708] pinctrl: apple-gpio: Make the irqchip immutable Prevent gpiolib from messing with the irqchip by advertising the irq_chip structure as immutable, making it const, and adding the various calls that gpiolib relies upon. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-7-maz@kernel.org --- drivers/pinctrl/pinctrl-apple-gpio.c | 29 +++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c index 72f4dd2466e1..5e610849dfc3 100644 --- a/drivers/pinctrl/pinctrl-apple-gpio.c +++ b/drivers/pinctrl/pinctrl-apple-gpio.c @@ -36,7 +36,6 @@ struct apple_gpio_pinctrl { struct pinctrl_desc pinctrl_desc; struct gpio_chip gpio_chip; - struct irq_chip irq_chip; u8 irqgrps[]; }; @@ -275,17 +274,21 @@ static unsigned int apple_gpio_irq_type(unsigned int type) static void apple_gpio_irq_mask(struct irq_data *data) { - struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data)); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc); apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE, FIELD_PREP(REG_GPIOx_MODE, REG_GPIOx_IN_IRQ_OFF)); + gpiochip_disable_irq(gc, data->hwirq); } static void apple_gpio_irq_unmask(struct irq_data *data) { - struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data)); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc); unsigned int irqtype = apple_gpio_irq_type(irqd_get_trigger_type(data)); + gpiochip_enable_irq(gc, data->hwirq); apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE, FIELD_PREP(REG_GPIOx_MODE, irqtype)); } @@ -343,13 +346,15 @@ static void apple_gpio_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static struct irq_chip apple_gpio_irqchip = { - .name = "Apple-GPIO", - .irq_startup = apple_gpio_irq_startup, - .irq_ack = apple_gpio_irq_ack, - .irq_mask = apple_gpio_irq_mask, - .irq_unmask = apple_gpio_irq_unmask, - .irq_set_type = apple_gpio_irq_set_type, +static const struct irq_chip apple_gpio_irqchip = { + .name = "Apple-GPIO", + .irq_startup = apple_gpio_irq_startup, + .irq_ack = apple_gpio_irq_ack, + .irq_mask = apple_gpio_irq_mask, + .irq_unmask = apple_gpio_irq_unmask, + .irq_set_type = apple_gpio_irq_set_type, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; /* Probe & register */ @@ -360,8 +365,6 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl) void **irq_data = NULL; int ret; - pctl->irq_chip = apple_gpio_irqchip; - pctl->gpio_chip.label = dev_name(pctl->dev); pctl->gpio_chip.request = gpiochip_generic_request; pctl->gpio_chip.free = gpiochip_generic_free; @@ -377,7 +380,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl) if (girq->num_parents) { int i; - girq->chip = &pctl->irq_chip; + gpio_irq_chip_set_chip(girq, &apple_gpio_irqchip); girq->parent_handler = apple_gpio_irq_handler; girq->parents = kmalloc_array(girq->num_parents, From 14dbe186b9d42cbf662eae5a4da14687edbf0edb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:43 +0100 Subject: [PATCH 685/708] pinctrl: msmgpio: Make the irqchip immutable Prevent gpiolib from messing with the irqchip by advertising the irq_chip structure as immutable, making it const, and adding the various calls that gpiolib relies upon. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-8-maz@kernel.org --- drivers/pinctrl/qcom/pinctrl-msm.c | 53 +++++++++++++++++++----------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 966ea6622ff3..a2abfe987ab1 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -42,7 +42,6 @@ * @chip: gpiochip handle. * @desc: pin controller descriptor * @restart_nb: restart notifier block. - * @irq_chip: irq chip information * @irq: parent irq for the TLMM irq_chip. * @intr_target_use_scm: route irq to application cpu using scm calls * @lock: Spinlock to protect register resources as well @@ -63,7 +62,6 @@ struct msm_pinctrl { struct pinctrl_desc desc; struct notifier_block restart_nb; - struct irq_chip irq_chip; int irq; bool intr_target_use_scm; @@ -868,6 +866,8 @@ static void msm_gpio_irq_enable(struct irq_data *d) struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + gpiochip_enable_irq(gc, d->hwirq); + if (d->parent_data) irq_chip_enable_parent(d); @@ -885,6 +885,8 @@ static void msm_gpio_irq_disable(struct irq_data *d) if (!test_bit(d->hwirq, pctrl->skip_wake_irqs)) msm_gpio_irq_mask(d); + + gpiochip_disable_irq(gc, d->hwirq); } /** @@ -958,6 +960,14 @@ static void msm_gpio_irq_ack(struct irq_data *d) raw_spin_unlock_irqrestore(&pctrl->lock, flags); } +static void msm_gpio_irq_eoi(struct irq_data *d) +{ + d = d->parent_data; + + if (d) + d->chip->irq_eoi(d); +} + static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d, unsigned int type) { @@ -1255,6 +1265,26 @@ static bool msm_gpio_needs_valid_mask(struct msm_pinctrl *pctrl) return device_property_count_u16(pctrl->dev, "gpios") > 0; } +static const struct irq_chip msm_gpio_irq_chip = { + .name = "msmgpio", + .irq_enable = msm_gpio_irq_enable, + .irq_disable = msm_gpio_irq_disable, + .irq_mask = msm_gpio_irq_mask, + .irq_unmask = msm_gpio_irq_unmask, + .irq_ack = msm_gpio_irq_ack, + .irq_eoi = msm_gpio_irq_eoi, + .irq_set_type = msm_gpio_irq_set_type, + .irq_set_wake = msm_gpio_irq_set_wake, + .irq_request_resources = msm_gpio_irq_reqres, + .irq_release_resources = msm_gpio_irq_relres, + .irq_set_affinity = msm_gpio_irq_set_affinity, + .irq_set_vcpu_affinity = msm_gpio_irq_set_vcpu_affinity, + .flags = (IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | + IRQCHIP_IMMUTABLE), +}; + static int msm_gpio_init(struct msm_pinctrl *pctrl) { struct gpio_chip *chip; @@ -1276,22 +1306,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) if (msm_gpio_needs_valid_mask(pctrl)) chip->init_valid_mask = msm_gpio_init_valid_mask; - pctrl->irq_chip.name = "msmgpio"; - pctrl->irq_chip.irq_enable = msm_gpio_irq_enable; - pctrl->irq_chip.irq_disable = msm_gpio_irq_disable; - pctrl->irq_chip.irq_mask = msm_gpio_irq_mask; - pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask; - pctrl->irq_chip.irq_ack = msm_gpio_irq_ack; - pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type; - pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake; - pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres; - pctrl->irq_chip.irq_release_resources = msm_gpio_irq_relres; - pctrl->irq_chip.irq_set_affinity = msm_gpio_irq_set_affinity; - pctrl->irq_chip.irq_set_vcpu_affinity = msm_gpio_irq_set_vcpu_affinity; - pctrl->irq_chip.flags = IRQCHIP_MASK_ON_SUSPEND | - IRQCHIP_SET_TYPE_MASKED | - IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND; - np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0); if (np) { chip->irq.parent_domain = irq_find_matching_host(np, @@ -1300,7 +1314,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) if (!chip->irq.parent_domain) return -EPROBE_DEFER; chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq; - pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent; /* * Let's skip handling the GPIOs, if the parent irqchip * is handling the direct connect IRQ of the GPIO. @@ -1313,7 +1326,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) } girq = &chip->irq; - girq->chip = &pctrl->irq_chip; + gpio_irq_chip_set_chip(girq, &msm_gpio_irq_chip); girq->parent_handler = msm_gpio_irq_handler; girq->fwnode = pctrl->dev->fwnode; girq->num_parents = 1; From 6173e56f76c712aac9d45208ccec7a065382911f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:44 +0100 Subject: [PATCH 686/708] pinctrl: amd: Make the irqchip immutable Prevent gpiolib from messing with the irqchip by advertising the irq_chip structure as immutable, making it const, and adding the various calls that gpiolib relies upon. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-9-maz@kernel.org --- drivers/pinctrl/pinctrl-amd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 1a7d686494ff..0645c2c24f50 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -387,6 +387,8 @@ static void amd_gpio_irq_enable(struct irq_data *d) struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); + gpiochip_enable_irq(gc, d->hwirq); + raw_spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); pin_reg |= BIT(INTERRUPT_ENABLE_OFF); @@ -408,6 +410,8 @@ static void amd_gpio_irq_disable(struct irq_data *d) pin_reg &= ~BIT(INTERRUPT_MASK_OFF); writel(pin_reg, gpio_dev->base + (d->hwirq)*4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + + gpiochip_disable_irq(gc, d->hwirq); } static void amd_gpio_irq_mask(struct irq_data *d) @@ -577,7 +581,7 @@ static void amd_irq_ack(struct irq_data *d) */ } -static struct irq_chip amd_gpio_irqchip = { +static const struct irq_chip amd_gpio_irqchip = { .name = "amd_gpio", .irq_ack = amd_irq_ack, .irq_enable = amd_gpio_irq_enable, @@ -593,7 +597,8 @@ static struct irq_chip amd_gpio_irqchip = { * the wake event. Otherwise the wake event will never clear and * prevent the system from suspending. */ - .flags = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND, + .flags = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; #define PIN_IRQ_PENDING (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF)) @@ -1026,7 +1031,7 @@ static int amd_gpio_probe(struct platform_device *pdev) amd_gpio_irq_init(gpio_dev); girq = &gpio_dev->gc.irq; - girq->chip = &amd_gpio_irqchip; + gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip); /* This will let us handle the parent IRQ in the driver */ girq->parent_handler = NULL; girq->num_parents = 0; From afefc3266272d40cdcd0fd713c7b42008fea19d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:45 +0100 Subject: [PATCH 687/708] gpio: Update TODO to mention immutable irq_chip structures 5 drivers are converted, a few hundred to go. Definitely worth of a TODO entry, in the hope that someone will notice it and do a bulk update. Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-10-maz@kernel.org --- drivers/gpio/TODO | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index b8b1473a5b1e..f87ff3fa8a53 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -178,3 +178,22 @@ discussed but the idea is to provide a low-level access point for debugging and hacking and to expose all lines without the need of any exporting. Also provide ample ammunition to shoot oneself in the foot, because this is debugfs after all. + + +Moving over to immutable irq_chip structures + +Most of the gpio chips implementing interrupt support rely on gpiolib +intercepting some of the irq_chip callbacks, preventing the structures +from being made read-only and forcing duplication of structures that +should otherwise be unique. + +The solution is to call into the gpiolib code when needed (resource +management, enable/disable or unmask/mask callbacks), and to let the +core code know about that by exposing a flag (IRQCHIP_IMMUTABLE) in +the irq_chip structure. The irq_chip structure can then be made unique +and const. + +A small number of drivers have been converted (pl061, tegra186, msm, +amd, apple), and can be used as examples of how to proceed with this +conversion. Note that drivers using the generic irqchip framework +cannot be converted yet, but watch this space! From 5644b66a9c63c3cadc6ba85faf5a15604e6cf29a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Apr 2022 15:18:46 +0100 Subject: [PATCH 688/708] Documentation: Update the recommended pattern for GPIO irqchips Update the documentation to get rid of the per-gpio_irq_chip irq_chip structure, and give examples of the new pattern. Reviewed-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419141846.598305-11-maz@kernel.org --- Documentation/driver-api/gpio/driver.rst | 175 ++++++++++++++++++----- 1 file changed, 142 insertions(+), 33 deletions(-) diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst index bbc53920d4dd..a1ddefa1f55f 100644 --- a/Documentation/driver-api/gpio/driver.rst +++ b/Documentation/driver-api/gpio/driver.rst @@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip. If you do this, the additional irq_chip will be set up by gpiolib at the same time as setting up the rest of the GPIO functionality. The following is a typical example of a chained cascaded interrupt handler using -the gpio_irq_chip: +the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions +call into the core gpiolib code: .. code-block:: c - /* Typical state container with dynamic irqchip */ + /* Typical state container */ struct my_gpio { struct gpio_chip gc; - struct irq_chip irq; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, d->hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + gpiochip_enable_irq(gc, d->hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; int irq; /* from platform etc */ struct my_gpio *g; struct gpio_irq_chip *girq; - /* Set up the irqchip dynamically */ - g->irq.name = "my_gpio_irq"; - g->irq.irq_ack = my_gpio_ack_irq; - g->irq.irq_mask = my_gpio_mask_irq; - g->irq.irq_unmask = my_gpio_unmask_irq; - g->irq.irq_set_type = my_gpio_set_irq_type; - /* Get a pointer to the gpio_irq_chip */ girq = &g->gc.irq; - girq->chip = &g->irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); girq->parent_handler = ftgpio_gpio_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents), @@ -458,23 +494,58 @@ the interrupt separately and go with it: .. code-block:: c - /* Typical state container with dynamic irqchip */ + /* Typical state container */ struct my_gpio { struct gpio_chip gc; - struct irq_chip irq; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, d->hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + gpiochip_enable_irq(gc, d->hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; int irq; /* from platform etc */ struct my_gpio *g; struct gpio_irq_chip *girq; - /* Set up the irqchip dynamically */ - g->irq.name = "my_gpio_irq"; - g->irq.irq_ack = my_gpio_ack_irq; - g->irq.irq_mask = my_gpio_mask_irq; - g->irq.irq_unmask = my_gpio_unmask_irq; - g->irq.irq_set_type = my_gpio_set_irq_type; - ret = devm_request_threaded_irq(dev, irq, NULL, irq_thread_fn, IRQF_ONESHOT, "my-chip", g); if (ret < 0) @@ -482,7 +553,7 @@ the interrupt separately and go with it: /* Get a pointer to the gpio_irq_chip */ girq = &g->gc.irq; - girq->chip = &g->irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); /* This will let us handle the parent IRQ in the driver */ girq->parent_handler = NULL; girq->num_parents = 0; @@ -500,24 +571,61 @@ In this case the typical set-up will look like this: /* Typical state container with dynamic irqchip */ struct my_gpio { struct gpio_chip gc; - struct irq_chip irq; struct fwnode_handle *fwnode; }; - int irq; /* from platform etc */ + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, d->hwirq); + irq_mask_mask_parent(d); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_desc_get_handler_data(d); + + gpiochip_enable_irq(gc, d->hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + + irq_mask_unmask_parent(d); + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + struct my_gpio *g; struct gpio_irq_chip *girq; - /* Set up the irqchip dynamically */ - g->irq.name = "my_gpio_irq"; - g->irq.irq_ack = my_gpio_ack_irq; - g->irq.irq_mask = my_gpio_mask_irq; - g->irq.irq_unmask = my_gpio_unmask_irq; - g->irq.irq_set_type = my_gpio_set_irq_type; - /* Get a pointer to the gpio_irq_chip */ girq = &g->gc.irq; - girq->chip = &g->irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->fwnode = g->fwnode; @@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should typically be called in the .irq_disable() and .irq_enable() callbacks from the irqchip. -When using the gpiolib irqchip helpers, these callbacks are automatically -assigned. +When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks +are automatically assigned. This behaviour is deprecated and on its way +to be removed from the kernel. Real-Time compliance for GPIO IRQ chips From 34453c2e9f799d02f5f379519495208bbd96a935 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2022 19:23:24 +0100 Subject: [PATCH 689/708] irqchip/gic-v3: Exposes bit values for GICR_CTLR.{IR, CES} As we're about to expose GICR_CTLR.{IR,CES} to guests, populate the include file with the architectural values. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20220405182327.205520-2-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 12d91f0dedf9..728691365464 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -127,6 +127,8 @@ #define GICR_PIDR2 GICD_PIDR2 #define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_CES (1UL << 1) +#define GICR_CTLR_IR (1UL << 2) #define GICR_CTLR_RWP (1UL << 3) #define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) From a837ed362e7070d48b6064138d3b61eb75eb9fd9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2022 19:38:56 +0100 Subject: [PATCH 690/708] irqchip/gic-v3: Detect LPI invalidation MMIO registers Since GICv4.1, an implementation can offer the same MMIO-based implementation as DirectLPI, only with an ITS. Given that this can be hugely beneficial for workloads that are very LPI masking heavy (although these workloads are admitedly a bit odd). Interestingly, this is independent of RVPEI, which only *implies* the functionnality. So let's detect whether the implementation has GICR_CTLR.IR set, and propagate this as DirectLPI to the ITS driver. While we're at it, repaint the GICv3 banner so that we advertise the various capabilities at boot time to be slightly less invasive. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220405183857.205960-3-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b252d5534547..f98651ed4e62 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -919,6 +919,7 @@ static int __gic_update_rdist_properties(struct redist_region *region, void __iomem *ptr) { u64 typer = gic_read_typer(ptr + GICR_TYPER); + u32 ctlr = readl_relaxed(ptr + GICR_CTLR); /* Boot-time cleanip */ if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) { @@ -938,9 +939,18 @@ static int __gic_update_rdist_properties(struct redist_region *region, gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); - /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */ + /* + * TYPER.RVPEID implies some form of DirectLPI, no matter what the + * doc says... :-/ And CTLR.IR implies another subset of DirectLPI + * that the ITS driver can make use of for LPIs (and not VLPIs). + * + * These are 3 different ways to express the same thing, depending + * on the revision of the architecture and its relaxations over + * time. Just group them under the 'direct_lpi' banner. + */ gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID); gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) | + !!(ctlr & GICR_CTLR_IR) | gic_data.rdists.has_rvpeid); gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY); @@ -962,7 +972,11 @@ static void gic_update_rdist_properties(void) gic_iterate_rdists(__gic_update_rdist_properties); if (WARN_ON(gic_data.ppi_nr == UINT_MAX)) gic_data.ppi_nr = 0; - pr_info("%d PPIs implemented\n", gic_data.ppi_nr); + pr_info("GICv3 features: %d PPIs%s%s\n", + gic_data.ppi_nr, + gic_data.has_rss ? ", RSS" : "", + gic_data.rdists.has_direct_lpi ? ", DirectLPI" : ""); + if (gic_data.rdists.has_vlpis) pr_info("GICv4 features: %s%s%s\n", gic_data.rdists.has_direct_lpi ? "DirectLPI " : "", @@ -1803,8 +1817,6 @@ static int __init gic_init_bases(void __iomem *dist_base, irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); gic_data.has_rss = !!(typer & GICD_TYPER_RSS); - pr_info("Distributor has %sRange Selector support\n", - gic_data.has_rss ? "" : "no "); if (typer & GICD_TYPER_MBIS) { err = mbi_init(handle, gic_data.domain); From 63f13483f0689a4de20fbfd847866ab39bec736f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2022 19:38:57 +0100 Subject: [PATCH 691/708] irqchip/gic-v3: Relax polling of GIC{R,D}_CTLR.RWP Recent work on the KVM GIC emulation has revealed that the GICv3 driver is a bit RWP-happy, as it polls this bit for each and every write MMIO access involving a single interrupt. As it turns out, polling RWP is only required when: - Disabling an SGI, PPI or SPI - Disabling LPIs at the redistributor level - Disabling groups - Enabling ARE - Dealing with DPG* Simplify the driver by removing all the other instances of RWP polling, and add the one that was missing when enabling the distributor (as that's where we set ARE). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220405183857.205960-4-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 38 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f98651ed4e62..b8026847e787 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -352,28 +352,27 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) static void gic_poke_irq(struct irq_data *d, u32 offset) { - void (*rwp_wait)(void); void __iomem *base; u32 index, mask; offset = convert_offset_index(d, offset, &index); mask = 1 << (index % 32); - if (gic_irq_in_rdist(d)) { + if (gic_irq_in_rdist(d)) base = gic_data_rdist_sgi_base(); - rwp_wait = gic_redist_wait_for_rwp; - } else { + else base = gic_data.dist_base; - rwp_wait = gic_dist_wait_for_rwp; - } writel_relaxed(mask, base + offset + (index / 32) * 4); - rwp_wait(); } static void gic_mask_irq(struct irq_data *d) { gic_poke_irq(d, GICD_ICENABLER); + if (gic_irq_in_rdist(d)) + gic_redist_wait_for_rwp(); + else + gic_dist_wait_for_rwp(); } static void gic_eoimode1_mask_irq(struct irq_data *d) @@ -420,7 +419,11 @@ static int gic_irq_set_irqchip_state(struct irq_data *d, break; case IRQCHIP_STATE_MASKED: - reg = val ? GICD_ICENABLER : GICD_ISENABLER; + if (val) { + gic_mask_irq(d); + return 0; + } + reg = GICD_ISENABLER; break; default: @@ -574,7 +577,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type) { enum gic_intid_range range; unsigned int irq = gic_irq(d); - void (*rwp_wait)(void); void __iomem *base; u32 offset, index; int ret; @@ -590,17 +592,14 @@ static int gic_set_type(struct irq_data *d, unsigned int type) type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - if (gic_irq_in_rdist(d)) { + if (gic_irq_in_rdist(d)) base = gic_data_rdist_sgi_base(); - rwp_wait = gic_redist_wait_for_rwp; - } else { + else base = gic_data.dist_base; - rwp_wait = gic_dist_wait_for_rwp; - } offset = convert_offset_index(d, GICD_ICFGR, &index); - ret = gic_configure_irq(index, type, base + offset, rwp_wait); + ret = gic_configure_irq(index, type, base + offset, NULL); if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) { /* Misconfigured PPIs are usually not fatal */ pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq); @@ -807,8 +806,8 @@ static void __init gic_dist_init(void) for (i = 0; i < GIC_ESPI_NR; i += 4) writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i); - /* Now do the common stuff, and wait for the distributor to drain */ - gic_dist_config(base, GIC_LINE_NR, gic_dist_wait_for_rwp); + /* Now do the common stuff */ + gic_dist_config(base, GIC_LINE_NR, NULL); val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1; if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) { @@ -816,8 +815,9 @@ static void __init gic_dist_init(void) val |= GICD_CTLR_nASSGIreq; } - /* Enable distributor with ARE, Group1 */ + /* Enable distributor with ARE, Group1, and wait for it to drain */ writel_relaxed(val, base + GICD_CTLR); + gic_dist_wait_for_rwp(); /* * Set all global interrupts to the boot CPU only. ARE must be @@ -1298,8 +1298,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, */ if (enabled) gic_unmask_irq(d); - else - gic_dist_wait_for_rwp(); irq_data_update_effective_affinity(d, cpumask_of(cpu)); From 4053b6b43fae126bea0654493fe512d364ee9fc1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 9 Apr 2022 11:16:17 +0100 Subject: [PATCH 692/708] dt-bindings: interrupt-controller: arm,gic-v3: Make the v2 compat requirements explicit A common mistake when writing a device tree for a platform that is using GICv3 with ancient CPUs is to overlook the MMIO frames that implement the GICv2 compatibility feature, because this feature is implemented by the CPUs and not by the GIC itself. The compatibility feature itself is optional (all the modern implementations have dropped it), but is present in all the ARM Ltd implementations of the ARMv8.0 architecture (A3x, A53, A57, A72, A73), and many others from various implementers. Make it explicit that GICC, GICH and GICV are required for these CPUs. Also take this opportunity to update my email address, as people keep sending them to the wrong place... Signed-off-by: Marc Zyngier Cc: Rob Herring Cc: Krzysztof Kozlowski Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220409101617.268796-1-maz@kernel.org --- .../bindings/interrupt-controller/arm,gic-v3.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml index b7197f78e158..3912a89162f0 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ARM Generic Interrupt Controller, version 3 maintainers: - - Marc Zyngier + - Marc Zyngier description: | AArch64 SMP cores are often associated with a GICv3, providing Private @@ -78,7 +78,11 @@ properties: - GIC Hypervisor interface (GICH) - GIC Virtual CPU interface (GICV) - GICC, GICH and GICV are optional. + GICC, GICH and GICV are optional, but must be described if the CPUs + support them. Examples of such CPUs are ARM's implementations of the + ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and + A73 (this list is not exhaustive). + minItems: 2 maxItems: 4096 # Should be enough? From 2b2cd74a06c38cc26b2a17854f5e42f7270438eb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 12 Apr 2022 16:28:15 +0100 Subject: [PATCH 693/708] irqchip/gic-v3: Claim iomem resources As a simple quality-of-life tweak, claim our MMIO regions when mapping them, such that the GIC shows up in /proc/iomem. No effort is spent on trying to release them, since frankly if the GIC fails to probe then it's never getting a second try anyway. Signed-off-by: Robin Murphy Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/c534c2a458a3bf94ccdae8abc6edc3d45a689c30.1649777295.git.robin.murphy@arm.com --- drivers/irqchip/irq-gic-v3.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b8026847e787..9d220c6c053b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1990,10 +1990,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare u32 nr_redist_regions; int err, i; - dist_base = of_iomap(node, 0); - if (!dist_base) { + dist_base = of_io_request_and_map(node, 0, "GICD"); + if (IS_ERR(dist_base)) { pr_err("%pOF: unable to map gic dist registers\n", node); - return -ENXIO; + return PTR_ERR(dist_base); } err = gic_validate_dist_version(dist_base); @@ -2017,8 +2017,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare int ret; ret = of_address_to_resource(node, 1 + i, &res); - rdist_regs[i].redist_base = of_iomap(node, 1 + i); - if (ret || !rdist_regs[i].redist_base) { + rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR"); + if (ret || IS_ERR(rdist_regs[i].redist_base)) { pr_err("%pOF: couldn't map region %d\n", node, i); err = -ENODEV; goto out_unmap_rdist; @@ -2044,7 +2044,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare out_unmap_rdist: for (i = 0; i < nr_redist_regions; i++) - if (rdist_regs[i].redist_base) + if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base)) iounmap(rdist_regs[i].redist_base); kfree(rdist_regs); out_unmap_dist: @@ -2091,6 +2091,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, pr_err("Couldn't map GICR region @%llx\n", redist->base_address); return -ENOMEM; } + request_mem_region(redist->base_address, redist->length, "GICR"); gic_acpi_register_redist(redist->base_address, redist_base); return 0; @@ -2113,6 +2114,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; + request_mem_region(gicc->gicr_base_address, size, "GICR"); gic_acpi_register_redist(gicc->gicr_base_address, redist_base); return 0; @@ -2314,6 +2316,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end) pr_err("Unable to map GICD registers\n"); return -ENOMEM; } + request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD"); err = gic_validate_dist_version(acpi_data.dist_base); if (err) { From 4efc851c36e389f7ed432edac0149acc5f94b0c7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 3 May 2022 14:45:41 +0100 Subject: [PATCH 694/708] irqchip/exiu: Fix acknowledgment of edge triggered interrupts Currently the EXIU uses the fasteoi interrupt flow that is configured by it's parent (irq-gic-v3.c). With this flow the only chance to clear the interrupt request happens during .irq_eoi() and (obviously) this happens after the interrupt handler has run. EXIU requires edge triggered interrupts to be acked prior to interrupt handling. Without this we risk incorrect interrupt dismissal when a new interrupt is delivered after the handler reads and acknowledges the peripheral but before the irq_eoi() takes place. Fix this by clearing the interrupt request from .irq_ack() if we are configured for edge triggered interrupts. This requires adopting the fasteoi-ack flow instead of the fasteoi to ensure the ack gets called. These changes have been tested using the power button on a Developerbox/SC2A11 combined with some hackery in gpio-keys so I can play with the different trigger mode [and an mdelay(500) so I can can check what happens on a double click in both modes]. Fixes: 706cffc1b912 ("irqchip/exiu: Add support for Socionext Synquacer EXIU controller") Signed-off-by: Daniel Thompson Reviewed-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220503134541.2566457-1-daniel.thompson@linaro.org --- arch/arm64/Kconfig.platforms | 1 + drivers/irqchip/irq-sni-exiu.c | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 30b123cde02c..aaeaf57c8222 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -253,6 +253,7 @@ config ARCH_INTEL_SOCFPGA config ARCH_SYNQUACER bool "Socionext SynQuacer SoC Family" + select IRQ_FASTEOI_HIERARCHY_HANDLERS config ARCH_TEGRA bool "NVIDIA Tegra SoC Family" diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c index abd011fcecf4..c7db617e1a2f 100644 --- a/drivers/irqchip/irq-sni-exiu.c +++ b/drivers/irqchip/irq-sni-exiu.c @@ -37,11 +37,26 @@ struct exiu_irq_data { u32 spi_base; }; -static void exiu_irq_eoi(struct irq_data *d) +static void exiu_irq_ack(struct irq_data *d) { struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); writel(BIT(d->hwirq), data->base + EIREQCLR); +} + +static void exiu_irq_eoi(struct irq_data *d) +{ + struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); + + /* + * Level triggered interrupts are latched and must be cleared during + * EOI or the interrupt will be jammed on. Of course if a level + * triggered interrupt is still asserted then the write will not clear + * the interrupt. + */ + if (irqd_is_level_type(d)) + writel(BIT(d->hwirq), data->base + EIREQCLR); + irq_chip_eoi_parent(d); } @@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) writel_relaxed(val, data->base + EILVL); val = readl_relaxed(data->base + EIEDG); - if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) + if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) { val &= ~BIT(d->hwirq); - else + irq_set_handler_locked(d, handle_fasteoi_irq); + } else { val |= BIT(d->hwirq); + irq_set_handler_locked(d, handle_fasteoi_ack_irq); + } writel_relaxed(val, data->base + EIEDG); writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR); @@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) static struct irq_chip exiu_irq_chip = { .name = "EXIU", + .irq_ack = exiu_irq_ack, .irq_eoi = exiu_irq_eoi, .irq_enable = exiu_irq_enable, .irq_mask = exiu_irq_mask, From 168f633b1722597673e5aa5a6c7721191a9d221f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 26 Apr 2022 09:19:12 -0700 Subject: [PATCH 695/708] irqchip/xtensa-mx: Fix initial IRQ affinity in non-SMP setup When irq-xtensa-mx chip is used in non-SMP configuration its irq_set_affinity callback is not called leaving IRQ affinity set empty. As a result IRQ delivery does not work in that configuration. Initialize IRQ affinity of the xtensa MX interrupt distributor to CPU 0 for all external IRQ lines. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220426161912.1113784-1-jcmvbkbc@gmail.com --- drivers/irqchip/irq-xtensa-mx.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 27933338f7b3..8c581c985aa7 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = { .irq_set_affinity = xtensa_mx_irq_set_affinity, }; +static void __init xtensa_mx_init_common(struct irq_domain *root_domain) +{ + unsigned int i; + + irq_set_default_host(root_domain); + secondary_init_irq(); + + /* Initialize default IRQ routing to CPU 0 */ + for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i) + set_er(1, MIROUT(i)); +} + int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent) { struct irq_domain *root_domain = irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } @@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np, struct irq_domain *root_domain = irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init); From 1b2eb89ccf4ffff2ea83c41451b3fed709cd3fc8 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 24 Apr 2022 12:39:51 -0500 Subject: [PATCH 696/708] irqchip/sun6i-r: Use NULL for chip_data sparse complains about using an integer as a NULL pointer. Reported-by: kernel test robot Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220424173952.36591-1-samuel@sholland.org --- drivers/irqchip/irq-sun6i-r.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sun6i-r.c b/drivers/irqchip/irq-sun6i-r.c index 4cd3e533740b..a01e44049415 100644 --- a/drivers/irqchip/irq-sun6i-r.c +++ b/drivers/irqchip/irq-sun6i-r.c @@ -249,11 +249,13 @@ static int sun6i_r_intc_domain_alloc(struct irq_domain *domain, for (i = 0; i < nr_irqs; ++i, ++hwirq, ++virq) { if (hwirq == nmi_hwirq) { irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &sun6i_r_intc_nmi_chip, 0); + &sun6i_r_intc_nmi_chip, + NULL); irq_set_handler(virq, handle_fasteoi_ack_irq); } else { irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &sun6i_r_intc_wakeup_chip, 0); + &sun6i_r_intc_wakeup_chip, + NULL); } } From 50f0f26e7c8665763d0d7d3372dbcf191f94d077 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 23 Apr 2022 11:42:26 +0200 Subject: [PATCH 697/708] irqchip/aspeed-i2c-ic: Fix irq_of_parse_and_map() return value The irq_of_parse_and_map() returns 0 on failure, not a negative ERRNO. Fixes: f48e699ddf70 ("irqchip/aspeed-i2c-ic: Add I2C IRQ controller for Aspeed") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220423094227.33148-1-krzysztof.kozlowski@linaro.org --- drivers/irqchip/irq-aspeed-i2c-ic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c index a47db16ff960..9c9fc3e2967e 100644 --- a/drivers/irqchip/irq-aspeed-i2c-ic.c +++ b/drivers/irqchip/irq-aspeed-i2c-ic.c @@ -77,8 +77,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node, } i2c_ic->parent_irq = irq_of_parse_and_map(node, 0); - if (i2c_ic->parent_irq < 0) { - ret = i2c_ic->parent_irq; + if (!i2c_ic->parent_irq) { + ret = -EINVAL; goto err_iounmap; } From f03a9670d27d23fe734a456f16e2579b21ec02b4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 23 Apr 2022 11:42:27 +0200 Subject: [PATCH 698/708] irqchip/aspeed-scu-ic: Fix irq_of_parse_and_map() return value The irq_of_parse_and_map() returns 0 on failure, not a negative ERRNO. Fixes: 04f605906ff0 ("irqchip: Add Aspeed SCU interrupt controller") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220423094227.33148-2-krzysztof.kozlowski@linaro.org --- drivers/irqchip/irq-aspeed-scu-ic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c index 18b77c3e6db4..279e92cf0b16 100644 --- a/drivers/irqchip/irq-aspeed-scu-ic.c +++ b/drivers/irqchip/irq-aspeed-scu-ic.c @@ -157,8 +157,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic, } irq = irq_of_parse_and_map(node, 0); - if (irq < 0) { - rc = irq; + if (!irq) { + rc = -EINVAL; goto err; } From 8ca61cde32c1db778aa52631184484e051cbdf27 Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Fri, 22 Apr 2022 04:35:32 +0000 Subject: [PATCH 699/708] irqchip/armada-370-xp: Enable MSI affinity configuration With multiple devices attached via PCIe to an Armada 385 it is possible to overwhelm a single CPU with MSI interrupts. Under certain scenarios configuring the interrupts to be handled by more than one CPU would prevent the system from being overwhelmed. However the irqchip-aramada-370-xp driver is configured to only handle MSIs on the boot CPU, and provides no affinity configuration. This change adds support to the armada-370-xp driver to allow for configuring the affinity of specific MSI irqs and to generate the interrupts on secondary CPUs. This is done by enabling the private doorbell for all online CPUs and configures all CPUs to unmask MSI specific private doorbell bits. The CPU affinity selection of the interrupt is handled by the target list of the software triggered interrupt value, which is provided as the MSI message. The message has the associated CPU bit set for the target CPU. For private doorbell interrupts only one bit can be set otherwise all CPUs will receive the interrupt, so the lowest CPU in the affinity mask is used. This means that by default the first CPU will handle all the interrupts as was the case before. Signed-off-by: Nathan Rossi Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220422043532.146946-1-nathan@nathanrossi.com --- drivers/irqchip/irq-armada-370-xp.c | 45 +++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 5b8d571c041d..c877285d7095 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -209,15 +209,29 @@ static struct msi_domain_info armada_370_xp_msi_domain_info = { static void armada_370_xp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { + unsigned int cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); + msg->address_lo = lower_32_bits(msi_doorbell_addr); msg->address_hi = upper_32_bits(msi_doorbell_addr); - msg->data = 0xf00 | (data->hwirq + PCI_MSI_DOORBELL_START); + msg->data = BIT(cpu + 8) | (data->hwirq + PCI_MSI_DOORBELL_START); } static int armada_370_xp_msi_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { - return -EINVAL; + unsigned int cpu; + + if (!force) + cpu = cpumask_any_and(mask, cpu_online_mask); + else + cpu = cpumask_first(mask); + + if (cpu >= nr_cpu_ids) + return -EINVAL; + + irq_data_update_effective_affinity(irq_data, cpumask_of(cpu)); + + return IRQ_SET_MASK_OK; } static struct irq_chip armada_370_xp_msi_bottom_irq_chip = { @@ -264,11 +278,21 @@ static const struct irq_domain_ops armada_370_xp_msi_domain_ops = { .free = armada_370_xp_msi_free, }; -static int armada_370_xp_msi_init(struct device_node *node, - phys_addr_t main_int_phys_base) +static void armada_370_xp_msi_reenable_percpu(void) { u32 reg; + /* Enable MSI doorbell mask and combined cpu local interrupt */ + reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS) + | PCI_MSI_DOORBELL_MASK; + writel(reg, per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS); + /* Unmask local doorbell interrupt */ + writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); +} + +static int armada_370_xp_msi_init(struct device_node *node, + phys_addr_t main_int_phys_base) +{ msi_doorbell_addr = main_int_phys_base + ARMADA_370_XP_SW_TRIG_INT_OFFS; @@ -287,18 +311,13 @@ static int armada_370_xp_msi_init(struct device_node *node, return -ENOMEM; } - reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS) - | PCI_MSI_DOORBELL_MASK; - - writel(reg, per_cpu_int_base + - ARMADA_370_XP_IN_DRBEL_MSK_OFFS); - - /* Unmask IPI interrupt */ - writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); + armada_370_xp_msi_reenable_percpu(); return 0; } #else +static void armada_370_xp_msi_reenable_percpu(void) {} + static inline int armada_370_xp_msi_init(struct device_node *node, phys_addr_t main_int_phys_base) { @@ -501,6 +520,8 @@ static void armada_xp_mpic_reenable_percpu(void) } ipi_resume(); + + armada_370_xp_msi_reenable_percpu(); } static int armada_xp_mpic_starting_cpu(unsigned int cpu) From e9a50f12e579a48e124ac5adb93dafc35f0a46b8 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 6 Apr 2022 18:37:00 +0200 Subject: [PATCH 700/708] irqchip/imx-irqsteer: Constify irq_chip struct The imx_irqsteer_irq_chip struct is constant data. Signed-off-by: Lucas Stach Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406163701.1277930-1-l.stach@pengutronix.de --- drivers/irqchip/irq-imx-irqsteer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c index 8d91a02593fc..e286e7c5ccbf 100644 --- a/drivers/irqchip/irq-imx-irqsteer.c +++ b/drivers/irqchip/irq-imx-irqsteer.c @@ -70,7 +70,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d) raw_spin_unlock_irqrestore(&data->lock, flags); } -static struct irq_chip imx_irqsteer_irq_chip = { +static const struct irq_chip imx_irqsteer_irq_chip = { .name = "irqsteer", .irq_mask = imx_irqsteer_irq_mask, .irq_unmask = imx_irqsteer_irq_unmask, From 4730d2233311d86cad9dc510318d1b40e4b53cf2 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 6 Apr 2022 18:37:01 +0200 Subject: [PATCH 701/708] irqchip/imx-irqsteer: Add runtime PM support There are now SoCs that integrate the irqsteer controller within a separate power domain. In order to allow this domain to be powered down when not needed, add runtime PM support to the driver. Signed-off-by: Lucas Stach Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220406163701.1277930-2-l.stach@pengutronix.de --- drivers/irqchip/irq-imx-irqsteer.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c index e286e7c5ccbf..96230a04ec23 100644 --- a/drivers/irqchip/irq-imx-irqsteer.c +++ b/drivers/irqchip/irq-imx-irqsteer.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #define CTRL_STRIDE_OFF(_t, _r) (_t * 4 * _r) @@ -175,7 +176,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) data->irq_count = DIV_ROUND_UP(irqs_num, 64); data->reg_num = irqs_num / 32; - if (IS_ENABLED(CONFIG_PM_SLEEP)) { + if (IS_ENABLED(CONFIG_PM)) { data->saved_reg = devm_kzalloc(&pdev->dev, sizeof(u32) * data->reg_num, GFP_KERNEL); @@ -199,6 +200,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) ret = -ENOMEM; goto out; } + irq_domain_set_pm_device(data->domain, &pdev->dev); if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) { ret = -EINVAL; @@ -219,6 +221,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + return 0; out: clk_disable_unprepare(data->ipg_clk); @@ -241,7 +246,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM static void imx_irqsteer_save_regs(struct irqsteer_data *data) { int i; @@ -288,7 +293,10 @@ static int imx_irqsteer_resume(struct device *dev) #endif static const struct dev_pm_ops imx_irqsteer_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(imx_irqsteer_suspend, + imx_irqsteer_resume, NULL) }; static const struct of_device_id imx_irqsteer_dt_ids[] = { From 0c16e931a735500f15db74916db56c698d8ff735 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 11:21:24 +0800 Subject: [PATCH 702/708] irqchip/csky: Return true/false (not 1/0) from bool functions Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. Signed-off-by: Haowen Bai Acked-by: Guo Ren Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1647487284-30088-1-git-send-email-baihaowen@meizu.com --- drivers/irqchip/irq-csky-apb-intc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c index d36f536506ba..42d8a2438ebc 100644 --- a/drivers/irqchip/irq-csky-apb-intc.c +++ b/drivers/irqchip/irq-csky-apb-intc.c @@ -136,11 +136,11 @@ static inline bool handle_irq_perbit(struct pt_regs *regs, u32 hwirq, u32 irq_base) { if (hwirq == 0) - return 0; + return false; generic_handle_domain_irq(root_domain, irq_base + __fls(hwirq)); - return 1; + return true; } /* gx6605s 64 irqs interrupt controller */ From 4c5b2be1d071af26749790429726712e4d9105fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 8 Mar 2022 12:11:16 -0800 Subject: [PATCH 703/708] irqchip/gic: Improved warning about incorrect type Issue the warning for interrupt lines that have an incorrect interrupt type and also print the hardware interrupt number to facilitate the resolution of such problems. Signed-off-by: Florian Fainelli Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220308201117.3870678-1-f.fainelli@gmail.com --- drivers/irqchip/irq-gic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 09c710ecc387..820404cb56bc 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1115,7 +1115,8 @@ static int gic_irq_domain_translate(struct irq_domain *d, *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; /* Make it clear that broken DTs are... broken */ - WARN_ON(*type == IRQ_TYPE_NONE); + WARN(*type == IRQ_TYPE_NONE, + "HW irq %ld has invalid type\n", *hwirq); return 0; } @@ -1132,7 +1133,8 @@ static int gic_irq_domain_translate(struct irq_domain *d, *hwirq = fwspec->param[0]; *type = fwspec->param[1]; - WARN_ON(*type == IRQ_TYPE_NONE); + WARN(*type == IRQ_TYPE_NONE, + "HW irq %ld has invalid type\n", *hwirq); return 0; } From a3d66a76348daf559873f19afc912a2a7c2ccdaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 25 Apr 2022 13:37:05 +0200 Subject: [PATCH 704/708] irqchip/armada-370-xp: Do not touch Performance Counter Overflow on A375, A38x, A39x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register ARMADA_370_XP_INT_FABRIC_MASK_OFFS is Armada 370 and XP specific and on new Armada platforms it has different meaning. It does not configure Performance Counter Overflow interrupt masking. So do not touch this register on non-A370/XP platforms (A375, A38x and A39x). Signed-off-by: Pali Rohár Cc: stable@vger.kernel.org Fixes: 28da06dfd9e4 ("irqchip: armada-370-xp: Enable the PMU interrupts") Reviewed-by: Andrew Lunn Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425113706.29310-1-pali@kernel.org --- drivers/irqchip/irq-armada-370-xp.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index c877285d7095..ee18eb3e72b7 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -327,7 +327,16 @@ static inline int armada_370_xp_msi_init(struct device_node *node, static void armada_xp_mpic_perf_init(void) { - unsigned long cpuid = cpu_logical_map(smp_processor_id()); + unsigned long cpuid; + + /* + * This Performance Counter Overflow interrupt is specific for + * Armada 370 and XP. It is not available on Armada 375, 38x and 39x. + */ + if (!of_machine_is_compatible("marvell,armada-370-xp")) + return; + + cpuid = cpu_logical_map(smp_processor_id()); /* Enable Performance Counter Overflow interrupts */ writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid), From adf14453d2c037ab529040c1186ea32e277e783a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:36 +0100 Subject: [PATCH 705/708] irqchip/gic-v3: Ensure pseudo-NMIs have an ISB between ack and handling There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. We identified and fixes this for regular IRQs in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Unfortunately, we forgot to do the same for psuedo-NMIs when support for those was added in commit: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Which means that when pseudo-NMIs are used for PMU support, we'll hit the same problem. Apply the same fix as for regular IRQs. Note that when EOI mode 1 is in use, the call to gic_write_eoir() will provide an ISB. Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-2-mark.rutland@arm.com --- drivers/irqchip/irq-gic-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b252d5534547..7305d84f2df5 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -654,6 +654,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (static_branch_likely(&supports_deactivate_key)) gic_write_eoir(irqnr); + else + isb() + /* * Leave the PSR.I bit set to prevent other NMIs to be * received while handling this one. From 6efb50923771f392122f5ce69dfc43b08f16e449 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:37 +0100 Subject: [PATCH 706/708] irqchip/gic-v3: Refactor ISB + EOIR at ack time There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. To fix this, we place an ISB between a read of IAR and the subsequent invocation of an IRQ handler. When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking its handler, and we have a write to EOIR for this. As this write to EOIR requires an ISB, and this is provided by the gic_write_eoir() helper, we omit the usual ISB in this case, with the logic being: | if (static_branch_likely(&supports_deactivate_key)) | gic_write_eoir(irqnr); | else | isb(); This is somewhat opaque, and it would be a little clearer if there were an unconditional ISB, with only the write to EOIR being conditional, e.g. | if (static_branch_likely(&supports_deactivate_key)) | write_gicreg(irqnr, ICC_EOIR1_EL1); | | isb(); This patch rewrites the code that way, with this logic factored into a new helper function with comments explaining what the ISB is for, as were originally laid out in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Note that since then, we removed the IAR polling in commit: 342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop") ... which removed one of the two race conditions. For consistency, other portions of the driver are made to manipulate EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir() helper function is removed. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com --- arch/arm/include/asm/arch_gicv3.h | 7 +---- arch/arm64/include/asm/arch_gicv3.h | 6 ---- drivers/irqchip/irq-gic-v3.c | 43 ++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 413abfb42989..f82a819eb0db 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8bd5afc7b692..48d4473e8eee 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -26,12 +26,6 @@ * sets the GP register's most significant bits to 0 with an explicit cast. */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); - isb(); -} - static __always_inline void gic_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 7305d84f2df5..0cbc4e25c48d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -556,7 +556,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -640,10 +641,38 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + + isb(); +} + static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { bool irqs_enabled = interrupts_enabled(regs); @@ -652,10 +681,7 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (irqs_enabled) nmi_enter(); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb() + gic_complete_ack(irqnr); /* * Leave the PSR.I bit set to prevent other NMIs to be @@ -726,10 +752,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); if (generic_handle_domain_irq(gic_data.domain, irqnr)) { WARN_ONCE(true, "Unexpected interrupt received!\n"); From 614ab80c96474682157cabb14f8c8602b3422e90 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 14:30:38 +0100 Subject: [PATCH 707/708] irqchip/gic-v3: Fix priority mask handling When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent usage of local_irq_enable() and resulting in softirqs being run with IRQs erroneously masked (possibly resulting in deadlocks). This can happen when an IRQ exception is taken from a context where regular IRQs were unmasked, and either: (1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ being withdrawn since the IRQ exception was taken). (2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged. When an NMI is taken from a context where regular IRQs were masked, there is no problem. When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with perf, e.g. | # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1 | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c | Modules linked in: | CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12 | Hardware name: linux,dummy-virt (DT) | pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : arch_local_irq_enable+0x4c/0x6c | lr : __do_softirq+0x110/0x5d8 | sp : ffff8000080bbbc0 | pmr_save: 000000f0 | x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000 | x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008 | x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20 | x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000 | x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000 | x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294 | x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70 | x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf | x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0 | Call trace: | arch_local_irq_enable+0x4c/0x6c | __irq_exit_rcu+0x180/0x1ac | irq_exit_rcu+0x1c/0x44 | el1_interrupt+0x4c/0xe4 | el1h_64_irq_handler+0x18/0x24 | el1h_64_irq+0x74/0x78 | smpboot_thread_fn+0x68/0x2c0 | kthread+0x124/0x130 | ret_from_fork+0x10/0x20 | irq event stamp: 193241 | hardirqs last enabled at (193240): [] __do_softirq+0x10c/0x5d8 | hardirqs last disabled at (193241): [] el1_dbg+0x24/0x90 | softirqs last enabled at (193234): [] __do_softirq+0x470/0x5d8 | softirqs last disabled at (193239): [] __irq_exit_rcu+0x180/0x1ac | ---[ end trace 0000000000000000 ]--- The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the interrupted context, but the structure of gic_handle_irq() makes this also depend on whether the GIC reports an IRQ, NMI, or special INTID: * When the interrupted context had regular IRQs masked (and hence the interrupt must be an NMI), the entry code performs the NMI entry/exit and gic_handle_irq() should return with DAIF and ICC_PMR_EL1 unchanged. This is handled correctly today. * When the interrupted context had regular IRQs unmasked, the entry code performs IRQ entry/exit, but expects gic_handle_irq() to always update ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to returning (which it must do prior to invoking any regular IRQ handler). This unbalanced calling convention is necessary because we don't know whether an NMI has been taken until acknowledged by a read from ICC_IAR1_EL1, and so we need to perform the read with NMI masked in case an NMI has been taken (and needs to be handled with NMIs masked). Unfortunately, this is not handled consistently: - When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns immediately without manipulating ICC_PMR_EL1 and DAIF. - When RPR_EL1 indicates an NMI, gic_handle_irq() calls gic_handle_nmi() to invoke the NMI handler, then returns without manipulating ICC_PMR_EL1 and DAIF. - For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF prior to invoking the IRQ handler. There were related problems with special INTID handling in the past, where if an exception was taken from a context with regular IRQs masked and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously unmask NMIs in NMI context permitted an unexpected nested NMI. That case specifically was fixed by commit: a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups") ... but unfortunately that commit added an inverse problem, where if an exception was taken from a context with regular IRQs *unmasked* and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously fail to unmask NMIs (and consequently regular IRQs could not be unmasked during softirq processing). Before and after that commit, if an NMI was taken from a context with regular IRQs unmasked gic_handle_irq() would not unmask NMIs prior to returning, leading to the same problem with softirq handling. This patch fixes this by restructuring gic_handle_irq(), splitting it into separate irqson/irqsoff helper functions which consistently perform the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context, regardless of the event indicated by ICC_IAR1_EL1. The special INTID handling is moved into the low-level IRQ/NMI handler invocation helper functions, so that early returns don't prevent the required manipulation of DAIF + ICC_PMR_EL1. Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com --- drivers/irqchip/irq-gic-v3.c | 157 +++++++++++++++++++++-------------- 1 file changed, 94 insertions(+), 63 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0cbc4e25c48d..1af2b50f36f3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -673,78 +673,69 @@ static inline void gic_complete_ack(u32 irqnr) isb(); } -static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +static bool gic_rpr_is_nmi_prio(void) { - bool irqs_enabled = interrupts_enabled(regs); - int err; + if (!gic_supports_nmi()) + return false; - if (irqs_enabled) - nmi_enter(); + return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); +} + +static bool gic_irqnr_is_special(u32 irqnr) +{ + return irqnr >= 1020 && irqnr <= 1023; +} + +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; gic_complete_ack(irqnr); - /* - * Leave the PSR.I bit set to prevent other NMIs to be - * received while handling this one. - * PSR.I will be restored when we ERET to the - * interrupted context. - */ - err = generic_handle_domain_nmi(gic_data.domain, irqnr); - if (err) + if (generic_handle_domain_irq(gic_data.domain, irqnr)) { + WARN_ONCE(true, "Unexpected interrupt (irqnr %u)\n", irqnr); gic_deactivate_unhandled(irqnr); - - if (irqs_enabled) - nmi_exit(); -} - -static u32 do_read_iar(struct pt_regs *regs) -{ - u32 iar; - - if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) { - u64 pmr; - - /* - * We were in a context with IRQs disabled. However, the - * entry code has set PMR to a value that allows any - * interrupt to be acknowledged, and not just NMIs. This can - * lead to surprising effects if the NMI has been retired in - * the meantime, and that there is an IRQ pending. The IRQ - * would then be taken in NMI context, something that nobody - * wants to debug twice. - * - * Until we sort this, drop PMR again to a level that will - * actually only allow NMIs before reading IAR, and then - * restore it to what it was. - */ - pmr = gic_read_pmr(); - gic_pmr_mask_irqs(); - isb(); - - iar = gic_read_iar(); - - gic_write_pmr(pmr); - } else { - iar = gic_read_iar(); } - - return iar; } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { + if (gic_irqnr_is_special(irqnr)) + return; + + gic_complete_ack(irqnr); + + if (generic_handle_domain_nmi(gic_data.domain, irqnr)) { + WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr); + gic_deactivate_unhandled(irqnr); + } +} + +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) +{ + bool is_nmi; u32 irqnr; - irqnr = do_read_iar(regs); + irqnr = gic_read_iar(); - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) - return; + is_nmi = gic_rpr_is_nmi_prio(); - if (gic_supports_nmi() && - unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) { - gic_handle_nmi(irqnr, regs); - return; + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); } if (gic_prio_masking_enabled()) { @@ -752,12 +743,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - gic_complete_ack(irqnr); + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} - if (generic_handle_domain_irq(gic_data.domain, irqnr)) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - gic_deactivate_unhandled(irqnr); - } +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } static u32 gic_get_pribits(void) From d421fd6d1fbf00b6481d836e65bad07d6bad61ed Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 8 May 2022 22:49:41 -0500 Subject: [PATCH 708/708] irqchip: Add Kconfig symbols for sunxi drivers Not all of these drivers are needed on every ARCH_SUNXI platform. In particular, the ARCH_SUNXI symbol will be reused for the Allwinner D1, a RISC-V SoC which contains none of these irqchips. Introduce Kconfig symbols so we can select only the drivers actually used by a particular set of platforms. This also lets us move the irqchip driver dependencies to a more appropriate location. Signed-off-by: Samuel Holland Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220509034941.30704-1-samuel@sholland.org --- arch/arm/mach-sunxi/Kconfig | 12 +++++++++--- arch/arm64/Kconfig.platforms | 5 ++--- drivers/irqchip/Kconfig | 12 ++++++++++++ drivers/irqchip/Makefile | 6 +++--- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index e5c2fce281cd..abdb99fe1e97 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI depends on ARCH_MULTI_V5 || ARCH_MULTI_V7 select ARCH_HAS_RESET_CONTROLLER select CLKSRC_MMIO - select GENERIC_IRQ_CHIP select GPIOLIB - select IRQ_DOMAIN_HIERARCHY - select IRQ_FASTEOI_HIERARCHY_HANDLERS select PINCTRL select PM_OPP select SUN4I_TIMER @@ -22,10 +19,12 @@ if ARCH_MULTI_V7 config MACH_SUN4I bool "Allwinner A10 (sun4i) SoCs support" default ARCH_SUNXI + select SUN4I_INTC config MACH_SUN5I bool "Allwinner A10s / A13 (sun5i) SoCs support" default ARCH_SUNXI + select SUN4I_INTC select SUN5I_HSTIMER config MACH_SUN6I @@ -34,6 +33,8 @@ config MACH_SUN6I select ARM_GIC select MFD_SUN6I_PRCM select SUN5I_HSTIMER + select SUN6I_R_INTC + select SUNXI_NMI_INTC config MACH_SUN7I bool "Allwinner A20 (sun7i) SoCs support" @@ -43,17 +44,21 @@ config MACH_SUN7I select ARCH_SUPPORTS_BIG_ENDIAN select HAVE_ARM_ARCH_TIMER select SUN5I_HSTIMER + select SUNXI_NMI_INTC config MACH_SUN8I bool "Allwinner sun8i Family SoCs support" default ARCH_SUNXI select ARM_GIC select MFD_SUN6I_PRCM + select SUN6I_R_INTC + select SUNXI_NMI_INTC config MACH_SUN9I bool "Allwinner (sun9i) SoCs support" default ARCH_SUNXI select ARM_GIC + select SUNXI_NMI_INTC config ARCH_SUNXI_MC_SMP bool @@ -69,6 +74,7 @@ if ARCH_MULTI_V5 config MACH_SUNIV bool "Allwinner ARMv5 F-series (suniv) SoCs support" default ARCH_SUNXI + select SUN4I_INTC help Support for Allwinner suniv ARMv5 SoCs. (F1C100A, F1C100s, F1C200s, F1C500, F1C600) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index aaeaf57c8222..6a6457fed7b2 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -11,12 +11,11 @@ config ARCH_ACTIONS config ARCH_SUNXI bool "Allwinner sunxi 64-bit SoC Family" select ARCH_HAS_RESET_CONTROLLER - select GENERIC_IRQ_CHIP - select IRQ_DOMAIN_HIERARCHY - select IRQ_FASTEOI_HIERARCHY_HANDLERS select PINCTRL select RESET_CONTROLLER select SUN4I_TIMER + select SUN6I_R_INTC + select SUNXI_NMI_INTC help This enables support for Allwinner sunxi based SoCs like the A64. diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 15edb9a6fcae..135c156673a7 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -257,6 +257,18 @@ config ST_IRQCHIP help Enables SysCfg Controlled IRQs on STi based platforms. +config SUN4I_INTC + bool + +config SUN6I_R_INTC + bool + select IRQ_DOMAIN_HIERARCHY + select IRQ_FASTEOI_HIERARCHY_HANDLERS + +config SUNXI_NMI_INTC + bool + select GENERIC_IRQ_CHIP + config TB10X_IRQC bool select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 160a1d8ceaa9..9b1ffb0f98cc 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -23,9 +23,9 @@ obj-$(CONFIG_OMPIC) += irq-ompic.o obj-$(CONFIG_OR1K_PIC) += irq-or1k-pic.o obj-$(CONFIG_ORION_IRQCHIP) += irq-orion.o obj-$(CONFIG_OMAP_IRQCHIP) += irq-omap-intc.o -obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o -obj-$(CONFIG_ARCH_SUNXI) += irq-sun6i-r.o -obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o +obj-$(CONFIG_SUN4I_INTC) += irq-sun4i.o +obj-$(CONFIG_SUN6I_R_INTC) += irq-sun6i-r.o +obj-$(CONFIG_SUNXI_NMI_INTC) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o