diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 2a71c8f29f68..0a9ea515512a 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -208,7 +208,9 @@ MSR_KVM_STEAL_TIME: 0x4b564d03 __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; } whose data will be filled in by the hypervisor periodically. Only one @@ -232,6 +234,11 @@ MSR_KVM_STEAL_TIME: 0x4b564d03 nanoseconds. Time during which the vcpu is idle, will not be reported as steal time. + preempted: indicate the vCPU who owns this struct is running or + not. Non-zero values mean the vCPU has been preempted. Zero + means the vCPU is not preempted. NOTE, it is always zero if the + the hypervisor doesn't support this field. + MSR_KVM_EOI_EN: 0x4b564d04 data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0 when disabled. Bit 1 is reserved and must be zero. When PV end of diff --git a/arch/alpha/include/asm/mutex.h b/arch/alpha/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/alpha/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 43a7559c448b..2fec2dee3020 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -58,7 +58,6 @@ unsigned long get_wchan(struct task_struct *p); ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW diff --git a/arch/arc/include/asm/mutex.h b/arch/arc/include/asm/mutex.h deleted file mode 100644 index a2f88ff9f506..000000000000 --- a/arch/arc/include/asm/mutex.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to - * atomic dec based which can "count" any number of lock contenders. - * This ideally needs to be fixed in core, but for now switching to dec ver. - */ -#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2) -#include -#else -#include -#endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 16b630fbeb6a..6e1242da0159 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -60,15 +60,12 @@ struct task_struct; #ifndef CONFIG_EZNPS_MTM_EXT #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #else #define cpu_relax() \ __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory") -#define cpu_relax_lowlatency() barrier() - #endif #define copy_segments(tsk, mm) do { } while (0) diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h deleted file mode 100644 index 87c044910fe0..000000000000 --- a/arch/arm/include/asm/mutex.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * arch/arm/include/asm/mutex.h - * - * ARM optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H -/* - * On pre-ARMv6 hardware this results in a swp-based implementation, - * which is the most efficient. For ARMv6+, we have exclusive memory - * accessors and use atomic_dec to avoid the extra xchg operations - * on the locking slowpaths. - */ -#if __LINUX_ARM_ARCH__ < 6 -#include -#else -#include -#endif -#endif /* _ASM_MUTEX_H */ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 8a1e8e995dae..c3d5fc124a05 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -82,8 +82,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #endif -#define cpu_relax_lowlatency() cpu_relax() - #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 44e1d7f10add..b4ab238a59ec 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h -generic-y += mutex.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 60e34824e18c..747c65a616ed 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -149,8 +149,6 @@ static inline void cpu_relax(void) asm volatile("yield" ::: "memory"); } -#define cpu_relax_lowlatency() cpu_relax() - /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); diff --git a/arch/avr32/include/asm/mutex.h b/arch/avr32/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/avr32/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index 941593c7d9f3..972adcc1e8f4 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -92,7 +92,6 @@ extern struct avr32_cpuinfo boot_cpu_data; #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") struct cpu_context { diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 91d49c0a3118..2fb67b59d188 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += percpu.h generic-y += pgalloc.h diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 0c265aba94ad..85d4af97c986 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -92,7 +92,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() smp_mb() -#define cpu_relax_lowlatency() cpu_relax() /* Get the Silicon Revision of the chip */ static inline uint32_t __pure bfin_revid(void) diff --git a/arch/c6x/include/asm/mutex.h b/arch/c6x/include/asm/mutex.h deleted file mode 100644 index 7a7248e0462d..000000000000 --- a/arch/c6x/include/asm/mutex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_C6X_MUTEX_H -#define _ASM_C6X_MUTEX_H - -#include - -#endif /* _ASM_C6X_MUTEX_H */ diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index f2ef31be2f8b..b9eb3da7f278 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -121,7 +121,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(task) (task_pt_regs(task)->sp) #define cpu_relax() do { } while (0) -#define cpu_relax_lowlatency() cpu_relax() extern const struct seq_operations cpuinfo_op; diff --git a/arch/cris/include/asm/mutex.h b/arch/cris/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/cris/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 862126b58116..15b815df29c1 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -63,7 +63,6 @@ static inline void release_thread(struct task_struct *dead_task) #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); diff --git a/arch/frv/include/asm/mutex.h b/arch/frv/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/frv/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index 73f0a79ad8e6..ddaeb9cc9143 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -107,7 +107,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() /* data cache prefetch */ #define ARCH_HAS_PREFETCH diff --git a/arch/h8300/include/asm/mutex.h b/arch/h8300/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/h8300/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 111df7397ac7..65132d7ae9e5 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -127,7 +127,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #define HARD_RESET_NOW() ({ \ local_irq_disable(); \ diff --git a/arch/hexagon/include/asm/mutex.h b/arch/hexagon/include/asm/mutex.h deleted file mode 100644 index 58b52de1bc22..000000000000 --- a/arch/hexagon/include/asm/mutex.h +++ /dev/null @@ -1,8 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#include diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index d8501137c8d0..45a825402f63 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -56,7 +56,6 @@ struct thread_struct { } #define cpu_relax() __vmyield() -#define cpu_relax_lowlatency() cpu_relax() /* * Decides where the kernel will search for a free chunk of vm space during diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h deleted file mode 100644 index 28cb819e0ff9..000000000000 --- a/arch/ia64/include/asm/mutex.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * ia64 implementation of the mutex fastpath. - * - * Copyright (C) 2006 Ken Chen - * - */ - -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call . - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, then the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int ret = ia64_fetchadd4_rel(count, 1); - if (unlikely(ret < 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) - return 1; - return 0; -} - -#endif diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index ce53c50d0ba4..03911a336406 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -547,7 +547,6 @@ ia64_eoi (void) } #define cpu_relax() ia64_hint(ia64_hint_pause) -#define cpu_relax_lowlatency() cpu_relax() static inline int ia64_get_irr(unsigned int vector) diff --git a/arch/m32r/include/asm/mutex.h b/arch/m32r/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/m32r/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 9f8fd9bef70f..5767367550c6 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -133,6 +133,5 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.sp) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index eb85bd9c6180..1f2e5d31cb24 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mman.h -generic-y += mutex.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index c84a2183b3f0..f5f790c31bf8 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -156,6 +156,5 @@ unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) ((tsk)->thread.esp0)) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #endif diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 29acb89daaaa..167150c701d1 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -27,7 +27,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += pci.h generic-y += percpu.h diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index a0333ebcac35..ec6a49076980 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -152,7 +152,6 @@ unsigned long get_wchan(struct task_struct *p); #define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() extern void setup_priv(void); diff --git a/arch/microblaze/include/asm/mutex.h b/arch/microblaze/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/microblaze/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index c38d0dd91134..37ef196e4519 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -22,7 +22,6 @@ extern const struct seq_operations cpuinfo_op; # define cpu_relax() barrier() -# define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(tsk) \ (((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1) diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 9740066cc631..3269b742a75e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -9,7 +9,6 @@ generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 0d36c87acbe2..95b8c471f572 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -389,7 +389,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() /* * Return_address is a replacement for __builtin_return_address(count) diff --git a/arch/mn10300/include/asm/mutex.h b/arch/mn10300/include/asm/mutex.h deleted file mode 100644 index 84f5490c6fb4..000000000000 --- a/arch/mn10300/include/asm/mutex.h +++ /dev/null @@ -1,16 +0,0 @@ -/* MN10300 Mutex fastpath - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - * - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#include diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index b10ba121c849..18e17abf7664 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -69,7 +69,6 @@ extern void print_cpu_info(struct mn10300_cpuinfo *); extern void dodgy_tsc(void); #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() /* * User space process size: 1.75GB (default). diff --git a/arch/nios2/include/asm/mutex.h b/arch/nios2/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/nios2/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 1c953f0cadbf..3bbbc3d798e5 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -88,7 +88,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.kregs->sp) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/mutex.h b/arch/openrisc/include/asm/mutex.h deleted file mode 100644 index b85a0cfa9fc9..000000000000 --- a/arch/openrisc/include/asm/mutex.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * OpenRISC Linux - * - * Linux architectural port borrowing liberally from similar works of - * others. All original copyrights apply as per the original source - * declaration. - * - * OpenRISC implementation: - * Copyright (C) 2003 Matjaz Breskvar - * Copyright (C) 2010-2011 Jonas Bonn - * et al. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 70334c9f7d24..a908e6c30a00 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -92,7 +92,6 @@ extern unsigned long thread_saved_pc(struct task_struct *t); #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index f9b3a81aefcd..91f53c07f410 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += param.h generic-y += percpu.h generic-y += poll.h diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 2e674e13e005..ca40741378be 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -309,7 +309,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30]) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() /* * parisc_requires_coherency() is used to identify the combined VIPT/PIPT diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h deleted file mode 100644 index 078155fa1189..000000000000 --- a/arch/powerpc/include/asm/mutex.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm - */ -#ifndef _ASM_POWERPC_MUTEX_H -#define _ASM_POWERPC_MUTEX_H - -static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new) -{ - int t; - - __asm__ __volatile__ ( -"1: lwarx %0,0,%1 # mutex trylock\n\ - cmpw 0,%0,%2\n\ - bne- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %3,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - "\n\ -2:" - : "=&r" (t) - : "r" (&v->counter), "r" (old), "r" (new) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_dec_return_lock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%1 # mutex lock\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_inc_return_unlock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1 # mutex unlock\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call . - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_inc_return_unlock(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0, and return 1 (success), or if the count - * was not 1, then return 0 (failure). - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c07c31b0e89e..dac83fcb9445 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif -#define cpu_relax_lowlatency() cpu_relax() - /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index fa37fe93bc02..8c1b913de6d7 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,14 @@ #define SYNC_IO #endif +#ifdef CONFIG_PPC_PSERIES +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); +} +#endif + static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/s390/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 602af692efdc..9d3a21aedc97 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -234,9 +234,10 @@ static inline unsigned short stap(void) /* * Give up the time slice of the virtual PU. */ -void cpu_relax(void); +#define cpu_relax_yield cpu_relax_yield +void cpu_relax_yield(void); -#define cpu_relax_lowlatency() barrier() +#define cpu_relax() barrier() #define ECAG_CACHE_ATTRIBUTE 0 #define ECAG_CPU_ATTRIBUTE 1 diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 7e9e09f600fa..7ecd8902a5c3 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -23,6 +23,14 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) return __sync_bool_compare_and_swap(lock, old, new); } +#ifndef CONFIG_SMP +static inline bool arch_vcpu_is_preempted(int cpu) { return false; } +#else +bool arch_vcpu_is_preempted(int cpu); +#endif + +#define vcpu_is_preempted arch_vcpu_is_preempted + /* * Simple spin lock operations. There are two variants, one clears IRQ's * on the local processor, one does not. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 81d0808085e6..9e60ef144d03 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -53,7 +53,7 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax(void) +void notrace cpu_relax_yield(void) { if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { diag_stat_inc(DIAG_STAT_X044); @@ -61,7 +61,7 @@ void notrace cpu_relax(void) } barrier(); } -EXPORT_SYMBOL(cpu_relax); +EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35531fe1c5ea..b988ed1d75ad 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -368,10 +368,15 @@ int smp_find_processor_id(u16 address) return -1; } -int smp_vcpu_scheduled(int cpu) +bool arch_vcpu_is_preempted(int cpu) { - return pcpu_running(pcpu_devices + cpu); + if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) + return false; + if (pcpu_running(pcpu_devices + cpu)) + return false; + return true; } +EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index e5f50a7d2f4e..e48a48ec24bc 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -37,15 +37,6 @@ static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); } -static inline int cpu_is_preempted(int cpu) -{ - if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) - return 0; - if (smp_vcpu_scheduled(cpu)) - return 0; - return 1; -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { unsigned int cpu = SPINLOCK_LOCKVAL; @@ -62,7 +53,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) continue; } /* First iteration: check if the lock owner is running. */ - if (first_diag && cpu_is_preempted(~owner)) { + if (first_diag && arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; continue; @@ -81,7 +72,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; } @@ -108,7 +99,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) continue; } /* Check if the lock owner is running. */ - if (first_diag && cpu_is_preempted(~owner)) { + if (first_diag && arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; continue; @@ -127,7 +118,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; } @@ -165,7 +156,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -211,7 +202,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -241,7 +232,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -285,7 +276,7 @@ void arch_lock_relax(unsigned int cpu) { if (!cpu) return; - if (MACHINE_IS_LPAR && !cpu_is_preempted(~cpu)) + if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(~cpu)) return; smp_yield_cpu(~cpu); } diff --git a/arch/score/include/asm/mutex.h b/arch/score/include/asm/mutex.h deleted file mode 100644 index 10d48fe4db97..000000000000 --- a/arch/score/include/asm/mutex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_MUTEX_H -#define _ASM_SCORE_MUTEX_H - -#include - -#endif /* _ASM_SCORE_MUTEX_H */ diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index 851f441991d2..d9a922d8711b 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -24,7 +24,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define current_text_addr() ({ __label__ _l; _l: &&_l; }) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #define release_thread(thread) do {} while (0) /* diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h deleted file mode 100644 index dad29b687bd3..000000000000 --- a/arch/sh/include/asm/mutex-llsc.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * arch/sh/include/asm/mutex-llsc.h - * - * SH-4A optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef __ASM_SH_MUTEX_LLSC_H -#define __ASM_SH_MUTEX_LLSC_H - -/* - * Attempting to lock a mutex on SH4A is done like in ARMv6+ architecure. - * with a bastardized atomic decrement (it is not a reliable atomic decrement - * but it satisfies the defined semantics for our purpose, while being - * smaller and faster than a real atomic decrement or atomic swap. - * The idea is to attempt decrementing the lock value only once. If once - * decremented it isn't zero, or if its store-back fails due to a dispute - * on the exclusive store, we simply bail out immediately through the slow - * path where the lock will be reattempted until it succeeds. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n" - "add #-1, %0 \n" - "movco.l %0, @%2 \n" - "movt %1 \n" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n" - "add #-1, %0 \n" - "movco.l %0, @%2 \n" - "movt %1 \n" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res != 0)) - __res = -1; - - return __res; -} - -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n\t" - "add #1, %0 \n\t" - "movco.l %0, @%2 \n\t" - "movt %1 \n\t" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res <= 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we do an atomic decrement and check the - * result and put it in the __res variable. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __res, __orig; - - __asm__ __volatile__ ( - "1: movli.l @%2, %0 \n\t" - "dt %0 \n\t" - "movco.l %0,@%2 \n\t" - "bf 1b \n\t" - "cmp/eq #0,%0 \n\t" - "bt 2f \n\t" - "mov #0, %1 \n\t" - "bf 3f \n\t" - "2: mov #1, %1 \n\t" - "3: " - : "=&z" (__orig), "=&r" (__res) - : "r" (&count->counter) - : "t"); - - return __res; -} -#endif /* __ASM_SH_MUTEX_LLSC_H */ diff --git a/arch/sh/include/asm/mutex.h b/arch/sh/include/asm/mutex.h deleted file mode 100644 index d8e37716a4a0..000000000000 --- a/arch/sh/include/asm/mutex.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#if defined(CONFIG_CPU_SH4A) -#include -#else -#include -#endif diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index f9a09942a32d..5addd69f70ef 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -97,7 +97,6 @@ extern struct sh_cpuinfo cpu_data[]; #define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory") #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); void stop_this_cpu(void *); diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index cfc918067f80..0569bfac4afb 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h -generic-y += mutex.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 812fd08f3e62..365d4cb267b4 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -119,7 +119,6 @@ extern struct task_struct *last_task_used_math; int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() extern void (*sparc_idle)(void); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index ce2595c89471..6448cfc8292f 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -216,7 +216,6 @@ unsigned long get_wchan(struct task_struct *task); "nop\n\t" \ ".previous" \ ::: "memory") -#define cpu_relax_lowlatency() cpu_relax() /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index ba35c41c71ff..2d1f5638974c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -21,7 +21,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += parport.h generic-y += poll.h diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 0684e88aacd8..0bc9968b97a1 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -264,8 +264,6 @@ static inline void cpu_relax(void) barrier(); } -#define cpu_relax_lowlatency() cpu_relax() - /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; extern const struct seq_operations cpuinfo_op; diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 904f3ebf4220..052f7f6d0551 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -17,7 +17,6 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += param.h generic-y += pci.h generic-y += percpu.h diff --git a/arch/unicore32/include/asm/mutex.h b/arch/unicore32/include/asm/mutex.h deleted file mode 100644 index fab7d0e8adf6..000000000000 --- a/arch/unicore32/include/asm/mutex.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/mutex.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * UniCore optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef __UNICORE_MUTEX_H__ -#define __UNICORE_MUTEX_H__ - -# include -#endif diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h index 8d21b7adf26b..4eaa42167667 100644 --- a/arch/unicore32/include/asm/processor.h +++ b/arch/unicore32/include/asm/processor.h @@ -71,7 +71,6 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h deleted file mode 100644 index 7d3a48275394..000000000000 --- a/arch/x86/include/asm/mutex.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h deleted file mode 100644 index e9355a84fc67..000000000000 --- a/arch/x86/include/asm/mutex_32.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Assembly implementation of the mutex fastpath, based on atomic - * decrement/increment. - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - */ -#ifndef _ASM_X86_MUTEX_32_H -#define _ASM_X86_MUTEX_32_H - -#include - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if it - * wasn't 1 originally. This function MUST leave the value lower than 1 - * even when the "1" assertion wasn't true. - */ -#define __mutex_fastpath_lock(count, fail_fn) \ -do { \ - unsigned int dummy; \ - \ - typecheck(atomic_t *, count); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " decl (%%eax)\n" \ - " jns 1f \n" \ - " call " #fail_fn "\n" \ - "1:\n" \ - : "=a" (dummy) \ - : "a" (count) \ - : "memory", "ecx", "edx"); \ -} while (0) - - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return(count) < 0)) - return -1; - else - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * try to promote the mutex from 0 to 1. if it wasn't 0, call . - * In the failure case, this function is allowed to either set the value - * to 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -#define __mutex_fastpath_unlock(count, fail_fn) \ -do { \ - unsigned int dummy; \ - \ - typecheck(atomic_t *, count); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " incl (%%eax)\n" \ - " jg 1f\n" \ - " call " #fail_fn "\n" \ - "1:\n" \ - : "=a" (dummy) \ - : "a" (count) \ - : "memory", "ecx", "edx"); \ -} while (0) - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - */ -static inline int __mutex_fastpath_trylock(atomic_t *count, - int (*fail_fn)(atomic_t *)) -{ - /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) - return 1; - - return 0; -} - -#endif /* _ASM_X86_MUTEX_32_H */ diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h deleted file mode 100644 index d9850758464e..000000000000 --- a/arch/x86/include/asm/mutex_64.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Assembly implementation of the mutex fastpath, based on atomic - * decrement/increment. - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - */ -#ifndef _ASM_X86_MUTEX_64_H -#define _ASM_X86_MUTEX_64_H - -/** - * __mutex_fastpath_lock - decrement and call function if negative - * @v: pointer of type atomic_t - * @fail_fn: function to call if the result is negative - * - * Atomically decrements @v and calls if the result is negative. - */ -#ifdef CC_HAVE_ASM_GOTO -static inline void __mutex_fastpath_lock(atomic_t *v, - void (*fail_fn)(atomic_t *)) -{ - asm_volatile_goto(LOCK_PREFIX " decl %0\n" - " jns %l[exit]\n" - : : "m" (v->counter) - : "memory", "cc" - : exit); - fail_fn(v); -exit: - return; -} -#else -#define __mutex_fastpath_lock(v, fail_fn) \ -do { \ - unsigned long dummy; \ - \ - typecheck(atomic_t *, v); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \ - " jns 1f \n" \ - " call " #fail_fn "\n" \ - "1:" \ - : "=D" (dummy) \ - : "D" (v) \ - : "rax", "rsi", "rdx", "rcx", \ - "r8", "r9", "r10", "r11", "memory"); \ -} while (0) -#endif - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return(count) < 0)) - return -1; - else - return 0; -} - -/** - * __mutex_fastpath_unlock - increment and call function if nonpositive - * @v: pointer of type atomic_t - * @fail_fn: function to call if the result is nonpositive - * - * Atomically increments @v and calls if the result is nonpositive. - */ -#ifdef CC_HAVE_ASM_GOTO -static inline void __mutex_fastpath_unlock(atomic_t *v, - void (*fail_fn)(atomic_t *)) -{ - asm_volatile_goto(LOCK_PREFIX " incl %0\n" - " jg %l[exit]\n" - : : "m" (v->counter) - : "memory", "cc" - : exit); - fail_fn(v); -exit: - return; -} -#else -#define __mutex_fastpath_unlock(v, fail_fn) \ -do { \ - unsigned long dummy; \ - \ - typecheck(atomic_t *, v); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \ - " jg 1f\n" \ - " call " #fail_fn "\n" \ - "1:" \ - : "=D" (dummy) \ - : "D" (v) \ - : "rax", "rsi", "rdx", "rcx", \ - "r8", "r9", "r10", "r11", "memory"); \ -} while (0) -#endif - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0 and return 1 (success), or return 0 (failure) - * if it wasn't 1 originally. [the fallback function is never used on - * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.] - */ -static inline int __mutex_fastpath_trylock(atomic_t *count, - int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) - return 1; - - return 0; -} - -#endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ce932812f142..6108b1fada2b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -678,6 +678,11 @@ static __always_inline void pv_kick(int cpu) PVOP_VCALL1(pv_lock_ops.kick, cpu); } +static __always_inline bool pv_vcpu_is_preempted(int cpu) +{ + return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); +} + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0f400c0e4979..3f2bc0f0d3e8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -310,6 +310,8 @@ struct pv_lock_ops { void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); + + struct paravirt_callee_save vcpu_is_preempted; }; /* This contains all the paravirt structures: we get a convenient @@ -508,6 +510,18 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif +#define PVOP_RETMASK(rettype) \ + ({ unsigned long __mask = ~0UL; \ + switch (sizeof(rettype)) { \ + case 1: __mask = 0xffUL; break; \ + case 2: __mask = 0xffffUL; break; \ + case 4: __mask = 0xffffffffUL; break; \ + default: break; \ + } \ + __mask; \ + }) + + #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ pre, post, ...) \ ({ \ @@ -535,7 +549,7 @@ int paravirt_disable_iospace(void); paravirt_clobber(clbr), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ - __ret = (rettype)__eax; \ + __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ } \ __ret; \ }) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 984a7bf17f6a..c84605bb2a15 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -588,8 +588,6 @@ static __always_inline void cpu_relax(void) rep_nop(); } -#define cpu_relax_lowlatency() cpu_relax() - /* Stop speculative execution and prefetching of modified code. */ static inline void sync_core(void) { diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index eaba08076030..c343ab52579f 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -32,6 +32,12 @@ static inline void queued_spin_unlock(struct qspinlock *lock) { pv_queued_spin_unlock(lock); } + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} #else static inline void queued_spin_unlock(struct qspinlock *lock) { diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 94dc8ca434e0..1421a6585126 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -45,7 +45,9 @@ struct kvm_steal_time { __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; }; #define KVM_STEAL_ALIGNMENT_BITS 5 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index edbbfc854e39..52e90d6054fb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -592,6 +592,14 @@ out: local_irq_restore(flags); } +__visible bool __kvm_vcpu_is_preempted(int cpu) +{ + struct kvm_steal_time *src = &per_cpu(steal_time, cpu); + + return !!src->preempted; +} +PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); + /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. */ @@ -608,6 +616,11 @@ void __init kvm_spinlock_init(void) pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = kvm_wait; pv_lock_ops.kick = kvm_kick_cpu; + + if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + pv_lock_ops.vcpu_is_preempted = + PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); + } } static __init int kvm_spinlock_init_jump(void) diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 2c55a003b793..6d4bf812af45 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -12,7 +12,6 @@ __visible void __native_queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); } - PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); bool pv_is_native_spin_unlock(void) @@ -21,12 +20,25 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted); + +bool pv_is_native_vcpu_is_preempted(void) +{ + return pv_lock_ops.vcpu_is_preempted.func == + __raw_callee_save___native_vcpu_is_preempted; +} + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, + .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 920c6ae08592..33cdec221f3d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -27,6 +28,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); +extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -56,9 +58,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + goto patch_default; + + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) { + start = start_pv_lock_ops_vcpu_is_preempted; + end = end_pv_lock_ops_vcpu_is_preempted; + goto patch_site; + } + goto patch_default; #endif default: +patch_default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index bb3840cedb4f..b0fceff502b3 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -21,6 +21,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -36,6 +37,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); +extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -68,9 +70,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + goto patch_default; + + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) { + start = start_pv_lock_ops_vcpu_is_preempted; + end = end_pv_lock_ops_vcpu_is_preempted; + goto patch_site; + } + goto patch_default; #endif default: +patch_default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04c5d96b1d67..59c2d6f1b131 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2071,6 +2071,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu) &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; + vcpu->arch.st.steal.preempted = 0; + if (vcpu->arch.st.steal.version & 1) vcpu->arch.st.steal.version += 1; /* first time write, random junk */ @@ -2826,8 +2828,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); } +static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + + vcpu->arch.st.steal.preempted = 1; + + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, + &vcpu->arch.st.steal.preempted, + offsetof(struct kvm_steal_time, preempted), + sizeof(vcpu->arch.st.steal.preempted)); +} + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_steal_time_set_preempted(vcpu); kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 233ee09c1ce8..c77db2288982 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -26,7 +26,6 @@ static inline void rep_nop(void) } #define cpu_relax() rep_nop() -#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3d6e0064cbfc..e8a9ea7d7a21 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -114,6 +114,7 @@ void xen_uninit_lock_cpu(int cpu) per_cpu(irq_name, cpu) = NULL; } +PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen); /* * Our init of PV spinlocks is split in two init functions due to us @@ -137,6 +138,7 @@ void __init xen_init_spinlocks(void) pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = xen_qlock_wait; pv_lock_ops.kick = xen_qlock_kick; + pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } /* diff --git a/arch/xtensa/include/asm/mutex.h b/arch/xtensa/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/xtensa/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index b42d68bfe3cf..86ffcd68e496 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -206,7 +206,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) #define cpu_relax() barrier() -#define cpu_relax_lowlatency() cpu_relax() /* Special register access. */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 8832f8ec1583..383d13416442 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -723,7 +723,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, if (busywait_stop(timeout_us, cpu)) break; - cpu_relax_lowlatency(); + cpu_relax(); } while (!need_resched()); return false; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 1c237d02f30b..c450076d2f9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -35,19 +35,6 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) -{ - if (!mutex_is_locked(mutex)) - return false; - -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) - return mutex->owner == task; -#else - /* Since UP may be pre-empted, we cannot assume that we own the lock */ - return false; -#endif -} - static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -240,15 +227,20 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) { - if (!mutex_trylock(&dev->struct_mutex)) { - if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return false; + switch (mutex_trylock_recursive(&dev->struct_mutex)) { + case MUTEX_TRYLOCK_FAILED: + return false; - *unlock = false; - } else + case MUTEX_TRYLOCK_SUCCESS: *unlock = true; + return true; - return true; + case MUTEX_TRYLOCK_RECURSIVE: + *unlock = false; + return true; + } + + BUG(); } static unsigned long diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 192b2d3a79cb..ab1dd020eb04 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -18,33 +18,24 @@ #include "msm_drv.h" #include "msm_gem.h" -static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) -{ - if (!mutex_is_locked(mutex)) - return false; - -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) - return mutex->owner == task; -#else - /* Since UP may be pre-empted, we cannot assume that we own the lock */ - return false; -#endif -} - static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock) { - if (!mutex_trylock(&dev->struct_mutex)) { - if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return false; - *unlock = false; - } else { + switch (mutex_trylock_recursive(&dev->struct_mutex)) { + case MUTEX_TRYLOCK_FAILED: + return false; + + case MUTEX_TRYLOCK_SUCCESS: *unlock = true; + return true; + + case MUTEX_TRYLOCK_RECURSIVE: + *unlock = false; + return true; } - return true; + BUG(); } - static unsigned long msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5dc128a8da83..5dc34653274a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -342,7 +342,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, endtime = busy_clock() + vq->busyloop_timeout; while (vhost_can_busy_poll(vq->dev, endtime) && vhost_vq_avail_empty(vq->dev, vq)) - cpu_relax_lowlatency(); + cpu_relax(); preempt_enable(); r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), out_num, in_num, NULL, NULL); @@ -533,7 +533,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) while (vhost_can_busy_poll(&net->dev, endtime) && !sk_has_rx_data(sk) && vhost_vq_avail_empty(&net->dev, vq)) - cpu_relax_lowlatency(); + cpu_relax(); preempt_enable(); diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h deleted file mode 100644 index c54829d3de37..000000000000 --- a/include/asm-generic/mutex-dec.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * include/asm-generic/mutex-dec.h - * - * Generic implementation of the mutex fastpath, based on atomic - * decrement/increment. - */ -#ifndef _ASM_GENERIC_MUTEX_DEC_H -#define _ASM_GENERIC_MUTEX_DEC_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_dec_return_acquire(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return_acquire(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call . - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, then the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_inc_return_release(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h deleted file mode 100644 index 61069ed334e2..000000000000 --- a/include/asm-generic/mutex-null.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * include/asm-generic/mutex-null.h - * - * Generic implementation of the mutex fastpath, based on NOP :-) - * - * This is used by the mutex-debugging infrastructure, but it can also - * be used by architectures that (for whatever reason) want to use the - * spinlock based slowpath. - */ -#ifndef _ASM_GENERIC_MUTEX_NULL_H -#define _ASM_GENERIC_MUTEX_NULL_H - -#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count) -#define __mutex_fastpath_lock_retval(count) (-1) -#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count) -#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) -#define __mutex_slowpath_needs_to_unlock() 1 - -#endif diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h deleted file mode 100644 index 3269ec4e195f..000000000000 --- a/include/asm-generic/mutex-xchg.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * include/asm-generic/mutex-xchg.h - * - * Generic implementation of the mutex fastpath, based on xchg(). - * - * NOTE: An xchg based implementation might be less optimal than an atomic - * decrement/increment based implementation. If your architecture - * has a reasonable atomic dec/inc then you should probably use - * asm-generic/mutex-dec.h instead, or you could open-code an - * optimized version in asm/mutex.h. - */ -#ifndef _ASM_GENERIC_MUTEX_XCHG_H -#define _ASM_GENERIC_MUTEX_XCHG_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if it - * wasn't 1 originally. This function MUST leave the value lower than 1 - * even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_xchg(count, 0) != 1)) - /* - * We failed to acquire the lock, so mark it contended - * to ensure that any waiting tasks are woken up by the - * unlock slow path. - */ - if (likely(atomic_xchg_acquire(count, -1) != 1)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_xchg_acquire(count, 0) != 1)) - if (likely(atomic_xchg(count, -1) != 1)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * try to promote the mutex from 0 to 1. if it wasn't 0, call - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than one. - * If the implementation sets it to a value of lower than one, the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_xchg_release(count, 1) != 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 0 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: spinlock based trylock implementation - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int prev; - - if (atomic_read(count) != 1) - return 0; - - prev = atomic_xchg_acquire(count, 0); - if (unlikely(prev < 0)) { - /* - * The lock was marked contended so we must restore that - * state. If while doing so we get back a prev value of 1 - * then we just own it. - * - * [ In the rare case of the mutex going to 1, to 0, to -1 - * and then back to 0 in this few-instructions window, - * this has the potential to trigger the slowpath for the - * owner's unlock path needlessly, but that's not a problem - * in practice. ] - */ - prev = atomic_xchg_acquire(count, prev); - if (prev < 0) - prev = 0; - } - - return prev; -} - -#endif diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h deleted file mode 100644 index fe91ab502793..000000000000 --- a/include/asm-generic/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_GENERIC_MUTEX_H -#define __ASM_GENERIC_MUTEX_H -/* - * Pull in the generic implementation for the mutex fastpath, - * which is a reasonable default on many architectures. - */ - -#include -#endif /* __ASM_GENERIC_MUTEX_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 01c0b9cc3915..6f0023797b33 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -645,6 +645,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h deleted file mode 100644 index 4ac8b1977b73..000000000000 --- a/include/linux/mutex-debug.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __LINUX_MUTEX_DEBUG_H -#define __LINUX_MUTEX_DEBUG_H - -#include -#include -#include - -/* - * Mutexes - debugging helpers: - */ - -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ - , .magic = &lockname - -#define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ -} while (0) - -extern void mutex_destroy(struct mutex *lock); - -#endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2cb7531e7d7a..b97870f2debd 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * Simple, straightforward mutexes with strict semantics: @@ -48,16 +49,12 @@ * locks and tasks (and only those tasks) */ struct mutex { - /* 1: unlocked, 0: locked, negative: locked, possible waiters */ - atomic_t count; + atomic_long_t owner; spinlock_t wait_lock; - struct list_head wait_list; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) - struct task_struct *owner; -#endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif + struct list_head wait_list; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif @@ -66,6 +63,11 @@ struct mutex { #endif }; +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); +} + /* * This is the control structure for tasks blocked on mutex, * which resides on the blocked task's kernel stack: @@ -79,9 +81,20 @@ struct mutex_waiter { }; #ifdef CONFIG_DEBUG_MUTEXES -# include + +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname + +extern void mutex_destroy(struct mutex *lock); + #else + # define __DEBUG_MUTEX_INITIALIZER(lockname) + +static inline void mutex_destroy(struct mutex *lock) {} + +#endif + /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -90,14 +103,12 @@ struct mutex_waiter { * * It is not allowed to initialize an already locked mutex. */ -# define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ +#define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ } while (0) -static inline void mutex_destroy(struct mutex *lock) {} -#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -107,7 +118,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif #define __MUTEX_INITIALIZER(lockname) \ - { .count = ATOMIC_INIT(1) \ + { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ __DEBUG_MUTEX_INITIALIZER(lockname) \ @@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *lock, const char *name, */ static inline int mutex_is_locked(struct mutex *lock) { - return atomic_read(&lock->count) != 1; + /* + * XXX think about spin_is_locked + */ + return __mutex_owner(lock) != NULL; } /* @@ -175,4 +189,35 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +/* + * These values are chosen such that FAIL and SUCCESS match the + * values of the regular mutex_trylock(). + */ +enum mutex_trylock_recursive_enum { + MUTEX_TRYLOCK_FAILED = 0, + MUTEX_TRYLOCK_SUCCESS = 1, + MUTEX_TRYLOCK_RECURSIVE, +}; + +/** + * mutex_trylock_recursive - trylock variant that allows recursive locking + * @lock: mutex to be locked + * + * This function should not be used, _ever_. It is purely for hysterical GEM + * raisins, and once those are gone this will be removed. + * + * Returns: + * MUTEX_TRYLOCK_FAILED - trylock failed, + * MUTEX_TRYLOCK_SUCCESS - lock acquired, + * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. + */ +static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum +mutex_trylock_recursive(struct mutex *lock) +{ + if (unlikely(__mutex_owner(lock) == current)) + return MUTEX_TRYLOCK_RECURSIVE; + + return mutex_trylock(lock); +} + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e9c009dc3a4a..8863bdf582d5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -989,7 +989,7 @@ enum cpu_idle_type { * already in a wake queue, the wakeup will happen soon and the second * waker can just skip it. * - * The WAKE_Q macro declares and initializes the list head. + * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be * called near the end of a function, where the fact that the queue is * not used again will be easy to see by inspection. @@ -1009,7 +1009,7 @@ struct wake_q_head { #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) -#define WAKE_Q(name) \ +#define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } extern void wake_q_add(struct wake_q_head *head, @@ -2444,6 +2444,10 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ +#ifndef cpu_relax_yield +#define cpu_relax_yield() cpu_relax() +#endif + /* * Do not use outside of architecture code which knows its limitations. * @@ -3508,6 +3512,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +/* + * In order to reduce various lock holder preemption latencies provide an + * interface to see if a vCPU is currently running or not. + * + * This allows us to terminate optimistic spin loops and block, analogous to + * the native optimistic spin heuristic of testing if the lock owner task is + * running or not. + */ +#ifndef vcpu_is_preempted +# define vcpu_is_preempted(cpu) false +#endif + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 2bb5deb0012e..7b0066814fa0 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -120,7 +120,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, struct ww_class *ww_class) { ctx->task = current; - ctx->stamp = atomic_long_inc_return(&ww_class->stamp); + ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; #ifdef CONFIG_DEBUG_MUTEXES ctx->ww_class = ww_class; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8cbd6e6894d5..7a2d8f0c8ae5 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -967,7 +967,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; int ret = 0; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1151,7 +1151,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, msg_ptr = wait.msg; } } else { - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); msg_ptr = msg_get(info); diff --git a/ipc/msg.c b/ipc/msg.c index e12307d0c920..32e9bd837cde 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -235,7 +235,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { struct msg_msg *msg, *t; struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); expunge_all(msq, -EIDRM, &wake_q); ss_wakeup(msq, &wake_q, true); @@ -397,7 +397,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, goto out_up; case IPC_SET: { - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { @@ -634,7 +634,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, struct msg_msg *msg; int err; struct ipc_namespace *ns; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); ns = current->nsproxy->ipc_ns; @@ -850,7 +850,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl struct msg_queue *msq; struct ipc_namespace *ns; struct msg_msg *msg, *copy = NULL; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); ns = current->nsproxy->ipc_ns; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index ebdb0043203a..84d882f3e299 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -225,7 +225,7 @@ config ARCH_SUPPORTS_ATOMIC_RMW config MUTEX_SPIN_ON_OWNER def_bool y - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW + depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW config RWSEM_SPIN_ON_OWNER def_bool y diff --git a/kernel/futex.c b/kernel/futex.c index 2c4be467fecd..9246d9f593d1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1298,7 +1298,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); bool deboost; int ret = 0; @@ -1415,7 +1415,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; int ret; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (!bitset) return -EINVAL; @@ -1469,7 +1469,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; int ret, op_ret; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); retry: ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); @@ -1708,7 +1708,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (requeue_pi) { /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4d7ffc0a0d00..7bd265f6b098 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -840,9 +840,9 @@ static struct lock_list *alloc_list_entry(void) /* * Add a new dependency to the head of the list: */ -static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip, - int distance, struct stack_trace *trace) +static int add_lock_to_list(struct lock_class *this, struct list_head *head, + unsigned long ip, int distance, + struct stack_trace *trace) { struct lock_list *entry; /* @@ -1868,14 +1868,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Ok, all validations passed, add the new lock * to the previous lock's dependency list: */ - ret = add_lock_to_list(hlock_class(prev), hlock_class(next), + ret = add_lock_to_list(hlock_class(next), &hlock_class(prev)->locks_after, next->acquire_ip, distance, &trace); if (!ret) return 0; - ret = add_lock_to_list(hlock_class(next), hlock_class(prev), + ret = add_lock_to_list(hlock_class(prev), &hlock_class(next)->locks_before, next->acquire_ip, distance, &trace); if (!ret) diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index c835270f0c2f..6a385aabcce7 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -28,7 +28,7 @@ struct mcs_spinlock { #define arch_mcs_spin_lock_contended(l) \ do { \ while (!(smp_load_acquire(l))) \ - cpu_relax_lowlatency(); \ + cpu_relax(); \ } while (0) #endif @@ -108,7 +108,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; /* Wait until the next pointer is set */ while (!(next = READ_ONCE(node->next))) - cpu_relax_lowlatency(); + cpu_relax(); } /* Pass lock to next waiter. */ diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9c951fade415..9aa713629387 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lock) { if (likely(debug_locks)) { DEBUG_LOCKS_WARN_ON(lock->magic != lock); - - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); } - - /* - * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug - * mutexes so that we can do it here after we've verified state. - */ - mutex_clear_owner(lock); - atomic_set(&lock->count, 1); } void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 57a871ae3c81..a459faa48987 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -27,16 +27,6 @@ extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} - #define spin_lock_mutex(lock, flags) \ do { \ struct mutex *l = container_of(lock, struct mutex, wait_lock); \ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index a70b90db3909..9b349619f431 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -27,41 +27,176 @@ #include #include -/* - * In the DEBUG case we are using the "NULL fastpath" for mutexes, - * which forces all calls into the slowpath: - */ #ifdef CONFIG_DEBUG_MUTEXES # include "mutex-debug.h" -# include -/* - * Must be 0 for the debug case so we do not do the unlock outside of the - * wait_lock region. debug_mutex_unlock() will do the actual unlock in this - * case. - */ -# undef __mutex_slowpath_needs_to_unlock -# define __mutex_slowpath_needs_to_unlock() 0 #else # include "mutex.h" -# include #endif void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { - atomic_set(&lock->count, 1); + atomic_long_set(&lock->owner, 0); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); - mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif debug_mutex_init(lock, name, key); } - EXPORT_SYMBOL(__mutex_init); +/* + * @owner: contains: 'struct task_struct *' to the current lock owner, + * NULL means not owned. Since task_struct pointers are aligned at + * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low + * bits to store extra state. + * + * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. + * Bit1 indicates unlock needs to hand the lock to the top-waiter + */ +#define MUTEX_FLAG_WAITERS 0x01 +#define MUTEX_FLAG_HANDOFF 0x02 + +#define MUTEX_FLAGS 0x03 + +static inline struct task_struct *__owner_task(unsigned long owner) +{ + return (struct task_struct *)(owner & ~MUTEX_FLAGS); +} + +static inline unsigned long __owner_flags(unsigned long owner) +{ + return owner & MUTEX_FLAGS; +} + +/* + * Actual trylock that will work on any unlocked state. + * + * When setting the owner field, we must preserve the low flag bits. + * + * Be careful with @handoff, only set that in a wait-loop (where you set + * HANDOFF) to avoid recursive lock attempts. + */ +static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) +{ + unsigned long owner, curr = (unsigned long)current; + + owner = atomic_long_read(&lock->owner); + for (;;) { /* must loop, can race against a flag */ + unsigned long old, flags = __owner_flags(owner); + + if (__owner_task(owner)) { + if (handoff && unlikely(__owner_task(owner) == current)) { + /* + * Provide ACQUIRE semantics for the lock-handoff. + * + * We cannot easily use load-acquire here, since + * the actual load is a failed cmpxchg, which + * doesn't imply any barriers. + * + * Also, this is a fairly unlikely scenario, and + * this contains the cost. + */ + smp_mb(); /* ACQUIRE */ + return true; + } + + return false; + } + + /* + * We set the HANDOFF bit, we must make sure it doesn't live + * past the point where we acquire it. This would be possible + * if we (accidentally) set the bit on an unlocked mutex. + */ + if (handoff) + flags &= ~MUTEX_FLAG_HANDOFF; + + old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); + if (old == owner) + return true; + + owner = old; + } +} + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * Lockdep annotations are contained to the slow paths for simplicity. + * There is nothing that would stop spreading the lockdep annotations outwards + * except more code. + */ + +/* + * Optimistic trylock that only works in the uncontended case. Make sure to + * follow with a __mutex_trylock() before failing. + */ +static __always_inline bool __mutex_trylock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) + return true; + + return false; +} + +static __always_inline bool __mutex_unlock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) + return true; + + return false; +} +#endif + +static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_or(flag, &lock->owner); +} + +static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_andnot(flag, &lock->owner); +} + +static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) +{ + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; +} + +/* + * Give up ownership to a specific task, when @task = NULL, this is equivalent + * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE + * semantics like a regular unlock, the __mutex_trylock() provides matching + * ACQUIRE semantics for the handoff. + */ +static void __mutex_handoff(struct mutex *lock, struct task_struct *task) +{ + unsigned long owner = atomic_long_read(&lock->owner); + + for (;;) { + unsigned long old, new; + +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); +#endif + + new = (owner & MUTEX_FLAG_WAITERS); + new |= (unsigned long)task; + + old = atomic_long_cmpxchg_release(&lock->owner, owner, new); + if (old == owner) + break; + + owner = old; + } +} + #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and @@ -69,7 +204,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); +static void __sched __mutex_lock_slowpath(struct mutex *lock); /** * mutex_lock - acquire the mutex @@ -95,14 +230,10 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); void __sched mutex_lock(struct mutex *lock) { might_sleep(); - /* - * The locking fastpath is the 1->0 transition from - * 'unlocked' into 'locked' state. - */ - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); - mutex_set_owner(lock); -} + if (!__mutex_trylock_fast(lock)) + __mutex_lock_slowpath(lock); +} EXPORT_SYMBOL(mutex_lock); #endif @@ -149,9 +280,6 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, /* * After acquiring lock with fastpath or when we lost out in contested * slowpath, set ctx and wake up any waiters so they can recheck. - * - * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, - * as the fastpath and opportunistic spinning are disabled in that case. */ static __always_inline void ww_mutex_set_context_fastpath(struct ww_mutex *lock, @@ -176,7 +304,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, /* * Check if lock is contended, if not there is nobody to wake up */ - if (likely(atomic_read(&lock->base.count) == 0)) + if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) return; /* @@ -227,7 +355,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) bool ret = true; rcu_read_lock(); - while (lock->owner == owner) { + while (__mutex_owner(lock) == owner) { /* * Ensure we emit the owner->on_cpu, dereference _after_ * checking lock->owner still matches owner. If that fails, @@ -236,12 +364,16 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) */ barrier(); - if (!owner->on_cpu || need_resched()) { + /* + * Use vcpu_is_preempted to detect lock holder preemption issue. + */ + if (!owner->on_cpu || need_resched() || + vcpu_is_preempted(task_cpu(owner))) { ret = false; break; } - cpu_relax_lowlatency(); + cpu_relax(); } rcu_read_unlock(); @@ -260,26 +392,24 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = READ_ONCE(lock->owner); - if (owner) - retval = owner->on_cpu; - rcu_read_unlock(); + owner = __mutex_owner(lock); + /* - * if lock->owner is not set, the mutex owner may have just acquired - * it and not set the owner yet or the mutex has been released. + * As lock holder preemption issue, we both skip spinning if task is not + * on cpu or its cpu is preempted + */ + if (owner) + retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); + rcu_read_unlock(); + + /* + * If lock->owner is not set, the mutex has been released. Return true + * such that we'll trylock in the spin path, which is a faster option + * than the blocking slow path. */ return retval; } -/* - * Atomically try to take the lock when it is available - */ -static inline bool mutex_try_to_acquire(struct mutex *lock) -{ - return !mutex_is_locked(lock) && - (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1); -} - /* * Optimistic spinning. * @@ -288,13 +418,6 @@ static inline bool mutex_try_to_acquire(struct mutex *lock) * need to reschedule. The rationale is that if the lock owner is * running, it is likely to release the lock soon. * - * Since this needs the lock owner, and this mutex implementation - * doesn't track the owner atomically in the lock field, we need to - * track it non-atomically. - * - * We can't do this for DEBUG_MUTEXES because that relies on wait_lock - * to serialize everything. - * * The mutex spinners are queued up using MCS lock so that only one * spinner can compete for the mutex. However, if mutex spinning isn't * going to happen, there is no point in going through the lock/unlock @@ -302,24 +425,39 @@ static inline bool mutex_try_to_acquire(struct mutex *lock) * * Returns true when the lock was taken, otherwise false, indicating * that we need to jump to the slowpath and sleep. + * + * The waiter flag is set to true if the spinner is a waiter in the wait + * queue. The waiter-spinner will spin on the lock directly and concurrently + * with the spinner at the head of the OSQ, if present, until the owner is + * changed to itself. */ static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) + struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, const bool waiter) { struct task_struct *task = current; - if (!mutex_can_spin_on_owner(lock)) - goto done; + if (!waiter) { + /* + * The purpose of the mutex_can_spin_on_owner() function is + * to eliminate the overhead of osq_lock() and osq_unlock() + * in case spinning isn't possible. As a waiter-spinner + * is not going to take OSQ lock anyway, there is no need + * to call mutex_can_spin_on_owner(). + */ + if (!mutex_can_spin_on_owner(lock)) + goto fail; - /* - * In order to avoid a stampede of mutex spinners trying to - * acquire the mutex all at once, the spinners need to take a - * MCS (queued) lock first before spinning on the owner field. - */ - if (!osq_lock(&lock->osq)) - goto done; + /* + * In order to avoid a stampede of mutex spinners trying to + * acquire the mutex all at once, the spinners need to take a + * MCS (queued) lock first before spinning on the owner field. + */ + if (!osq_lock(&lock->osq)) + goto fail; + } - while (true) { + for (;;) { struct task_struct *owner; if (use_ww_ctx && ww_ctx->acquired > 0) { @@ -335,40 +473,26 @@ static bool mutex_optimistic_spin(struct mutex *lock, * performed the optimistic spinning cannot be done. */ if (READ_ONCE(ww->ctx)) - break; + goto fail_unlock; } /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ - owner = READ_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) - break; - - /* Try to acquire the mutex if it is unlocked. */ - if (mutex_try_to_acquire(lock)) { - lock_acquired(&lock->dep_map, ip); - - if (use_ww_ctx) { - struct ww_mutex *ww; - ww = container_of(lock, struct ww_mutex, base); - - ww_mutex_set_context_fastpath(ww, ww_ctx); + owner = __mutex_owner(lock); + if (owner) { + if (waiter && owner == task) { + smp_mb(); /* ACQUIRE */ + break; } - mutex_set_owner(lock); - osq_unlock(&lock->osq); - return true; + if (!mutex_spin_on_owner(lock, owner)) + goto fail_unlock; } - /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. - */ - if (!owner && (need_resched() || rt_task(task))) + /* Try to acquire the mutex if it is unlocked. */ + if (__mutex_trylock(lock, waiter)) break; /* @@ -377,11 +501,20 @@ static bool mutex_optimistic_spin(struct mutex *lock, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax_lowlatency(); + cpu_relax(); } - osq_unlock(&lock->osq); -done: + if (!waiter) + osq_unlock(&lock->osq); + + return true; + + +fail_unlock: + if (!waiter) + osq_unlock(&lock->osq); + +fail: /* * If we fell out of the spin path because of need_resched(), * reschedule now, before we try-lock the mutex. This avoids getting @@ -400,14 +533,14 @@ done: } #else static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) + struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, const bool waiter) { return false; } #endif -__visible __used noinline -void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); /** * mutex_unlock - release the mutex @@ -422,21 +555,12 @@ void __sched __mutex_unlock_slowpath(atomic_t *lock_count); */ void __sched mutex_unlock(struct mutex *lock) { - /* - * The unlocking fastpath is the 0->1 transition from 'locked' - * into 'unlocked' state: - */ -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(lock); +#ifndef CONFIG_DEBUG_LOCK_ALLOC + if (__mutex_unlock_fast(lock)) + return; #endif - __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); + __mutex_unlock_slowpath(lock, _RET_IP_); } - EXPORT_SYMBOL(mutex_unlock); /** @@ -465,15 +589,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) lock->ctx = NULL; } -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(&lock->base); -#endif - __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); + mutex_unlock(&lock->base); } EXPORT_SYMBOL(ww_mutex_unlock); @@ -509,10 +625,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct task_struct *task = current; struct mutex_waiter waiter; unsigned long flags; + bool first = false; + struct ww_mutex *ww; int ret; if (use_ww_ctx) { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + ww = container_of(lock, struct ww_mutex, base); if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; } @@ -520,20 +638,21 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) { + if (__mutex_trylock(lock, false) || + mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { /* got the lock, yay! */ + lock_acquired(&lock->dep_map, ip); + if (use_ww_ctx) + ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; } spin_lock_mutex(&lock->wait_lock, flags); - /* - * Once more, try to acquire the lock. Only try-lock the mutex if - * it is unlocked to reduce unnecessary xchg() operations. + * After waiting to acquire the wait_lock, try again. */ - if (!mutex_is_locked(lock) && - (atomic_xchg_acquire(&lock->count, 0) == 1)) + if (__mutex_trylock(lock, false)) goto skip_wait; debug_mutex_lock_common(lock, &waiter); @@ -543,26 +662,26 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; + if (__mutex_waiter_is_first(lock, &waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + lock_contended(&lock->dep_map, ip); + set_task_state(task, state); for (;;) { /* - * Lets try to take the lock again - this is needed even if - * we get here for the first time (shortly after failing to - * acquire the lock), to make sure that we get a wakeup once - * it's unlocked. Later on, if we sleep, this is the - * operation that gives us the lock. We xchg it to -1, so - * that when we release the lock, we properly wake up the - * other waiters. We only attempt the xchg if the count is - * non-negative in order to avoid unnecessary xchg operations: + * Once we hold wait_lock, we're serialized against + * mutex_unlock() handing the lock off to us, do a trylock + * before testing the error conditions to make sure we pick up + * the handoff. */ - if (atomic_read(&lock->count) >= 0 && - (atomic_xchg_acquire(&lock->count, -1) == 1)) - break; + if (__mutex_trylock(lock, first)) + goto acquired; /* - * got a signal? (This code gets eliminated in the - * TASK_UNINTERRUPTIBLE case.) + * Check for signals and wound conditions while holding + * wait_lock. This ensures the lock cancellation is ordered + * against mutex_unlock() and wake-ups do not go missing. */ if (unlikely(signal_pending_state(state, task))) { ret = -EINTR; @@ -575,36 +694,49 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto err; } - __set_task_state(task, state); - - /* didn't get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); schedule_preempt_disabled(); + + if (!first && __mutex_waiter_is_first(lock, &waiter)) { + first = true; + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); + } + + set_task_state(task, state); + /* + * Here we order against unlock; we must either see it change + * state back to RUNNING and fall through the next schedule(), + * or we must see its unlock and acquire. + */ + if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) || + __mutex_trylock(lock, first)) + break; + spin_lock_mutex(&lock->wait_lock, flags); } + spin_lock_mutex(&lock->wait_lock, flags); +acquired: __set_task_state(task, TASK_RUNNING); mutex_remove_waiter(lock, &waiter, task); - /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); + __mutex_clear_flag(lock, MUTEX_FLAGS); + debug_mutex_free_waiter(&waiter); skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - mutex_set_owner(lock); - if (use_ww_ctx) { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (use_ww_ctx) ww_mutex_set_context_slowpath(ww, ww_ctx); - } spin_unlock_mutex(&lock->wait_lock, flags); preempt_enable(); return 0; err: + __set_task_state(task, TASK_RUNNING); mutex_remove_waiter(lock, &waiter, task); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -631,7 +763,6 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_, NULL, 0); } - EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched @@ -650,7 +781,6 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_, NULL, 0); } - EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); static inline int @@ -715,56 +845,66 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); /* * Release the lock, slowpath: */ -static inline void -__mutex_unlock_common_slowpath(struct mutex *lock, int nested) +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) { - unsigned long flags; - WAKE_Q(wake_q); + struct task_struct *next = NULL; + unsigned long owner, flags; + DEFINE_WAKE_Q(wake_q); + + mutex_release(&lock->dep_map, 1, ip); /* - * As a performance measurement, release the lock before doing other - * wakeup related duties to follow. This allows other tasks to acquire - * the lock sooner, while still handling cleanups in past unlock calls. - * This can be done as we do not enforce strict equivalence between the - * mutex counter and wait_list. + * Release the lock before (potentially) taking the spinlock such that + * other contenders can get on with things ASAP. * - * - * Some architectures leave the lock unlocked in the fastpath failure - * case, others need to leave it locked. In the later case we have to - * unlock it here - as the lock counter is currently 0 or negative. + * Except when HANDOFF, in that case we must not clear the owner field, + * but instead set it to the top waiter. */ - if (__mutex_slowpath_needs_to_unlock()) - atomic_set(&lock->count, 1); + owner = atomic_long_read(&lock->owner); + for (;;) { + unsigned long old; + +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); +#endif + + if (owner & MUTEX_FLAG_HANDOFF) + break; + + old = atomic_long_cmpxchg_release(&lock->owner, owner, + __owner_flags(owner)); + if (old == owner) { + if (owner & MUTEX_FLAG_WAITERS) + break; + + return; + } + + owner = old; + } spin_lock_mutex(&lock->wait_lock, flags); - mutex_release(&lock->dep_map, nested, _RET_IP_); debug_mutex_unlock(lock); - if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ struct mutex_waiter *waiter = - list_entry(lock->wait_list.next, - struct mutex_waiter, list); + list_first_entry(&lock->wait_list, + struct mutex_waiter, list); + + next = waiter->task; debug_mutex_wake_waiter(lock, waiter); - wake_q_add(&wake_q, waiter->task); + wake_q_add(&wake_q, next); } + if (owner & MUTEX_FLAG_HANDOFF) + __mutex_handoff(lock, next); + spin_unlock_mutex(&lock->wait_lock, flags); + wake_up_q(&wake_q); } -/* - * Release the lock, slowpath: - */ -__visible void -__mutex_unlock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_unlock_common_slowpath(lock, 1); -} - #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: @@ -789,38 +929,30 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock); */ int __sched mutex_lock_interruptible(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_interruptible_slowpath(lock); + + return __mutex_lock_interruptible_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_interruptible); int __sched mutex_lock_killable(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_killable_slowpath(lock); + + return __mutex_lock_killable_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_killable); -__visible void __sched -__mutex_lock_slowpath(atomic_t *lock_count) +static noinline void __sched +__mutex_lock_slowpath(struct mutex *lock) { - struct mutex *lock = container_of(lock_count, struct mutex, count); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, NULL, 0); } @@ -856,37 +988,6 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, #endif -/* - * Spinlock based trylock, we take the spinlock and check whether we - * can get the lock: - */ -static inline int __mutex_trylock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - unsigned long flags; - int prev; - - /* No need to trylock if the mutex is locked. */ - if (mutex_is_locked(lock)) - return 0; - - spin_lock_mutex(&lock->wait_lock, flags); - - prev = atomic_xchg_acquire(&lock->count, -1); - if (likely(prev == 1)) { - mutex_set_owner(lock); - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - } - - /* Set it back to 0 if there are no waiters: */ - if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); - - spin_unlock_mutex(&lock->wait_lock, flags); - - return prev == 1; -} - /** * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired @@ -903,13 +1004,12 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) */ int __sched mutex_trylock(struct mutex *lock) { - int ret; + bool locked = __mutex_trylock(lock, false); - ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); - if (ret) - mutex_set_owner(lock); + if (locked) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return ret; + return locked; } EXPORT_SYMBOL(mutex_trylock); @@ -917,36 +1017,28 @@ EXPORT_SYMBOL(mutex_trylock); int __sched __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { + if (__mutex_trylock_fast(&lock->base)) { ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_slowpath(lock, ctx); - return ret; + return 0; + } + + return __ww_mutex_lock_slowpath(lock, ctx); } EXPORT_SYMBOL(__ww_mutex_lock); int __sched __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { + if (__mutex_trylock_fast(&lock->base)) { ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); - return ret; + return 0; + } + + return __ww_mutex_lock_interruptible_slowpath(lock, ctx); } EXPORT_SYMBOL(__ww_mutex_lock_interruptible); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 6cd6b8e9efd7..4410a4af42a3 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -16,32 +16,6 @@ #define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER -/* - * The mutex owner can get read and written to locklessly. - * We should use WRITE_ONCE when writing the owner value to - * avoid store tearing, otherwise, a thread could potentially - * read a partially written and incomplete owner value. - */ -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} -#else -static inline void mutex_set_owner(struct mutex *lock) -{ -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ -} -#endif - #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 05a37857ab55..a3167941093b 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -21,6 +21,11 @@ static inline int encode_cpu(int cpu_nr) return cpu_nr + 1; } +static inline int node_cpu(struct optimistic_spin_node *node) +{ + return node->cpu - 1; +} + static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) { int cpu_nr = encoded_cpu_val - 1; @@ -75,7 +80,7 @@ osq_wait_next(struct optimistic_spin_queue *lock, break; } - cpu_relax_lowlatency(); + cpu_relax(); } return next; @@ -118,11 +123,13 @@ bool osq_lock(struct optimistic_spin_queue *lock) while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. + * Use vcpu_is_preempted() to avoid waiting for a preempted + * lock holder: */ - if (need_resched()) + if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) goto unqueue; - cpu_relax_lowlatency(); + cpu_relax(); } return true; @@ -148,7 +155,7 @@ unqueue: if (smp_load_acquire(&node->locked)) return true; - cpu_relax_lowlatency(); + cpu_relax(); /* * Or we race against a concurrent unqueue()'s step-B, in which diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 19248ddf37ce..cc3ed0ccdfa2 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -54,7 +54,7 @@ static __always_inline void rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) { while ((cnts & _QW_WMASK) == _QW_LOCKED) { - cpu_relax_lowlatency(); + cpu_relax(); cnts = atomic_read_acquire(&lock->cnts); } } @@ -130,7 +130,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0)) break; - cpu_relax_lowlatency(); + cpu_relax(); } /* When no more readers, set the locked flag */ @@ -141,7 +141,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) _QW_LOCKED) == _QW_WAITING)) break; - cpu_relax_lowlatency(); + cpu_relax(); } unlock: arch_spin_unlock(&lock->wait_lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2c49d76f96c3..2f443ed2320a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1446,7 +1446,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, struct wake_q_head *wqh)) { - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { rt_mutex_deadlock_account_unlock(current); @@ -1619,11 +1619,15 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a * proxy owner * - * @lock: the rt_mutex to be locked + * @lock: the rt_mutex to be locked * @proxy_owner:the task to set as owner * * No locking. Caller has to do serializing itself - * Special API call for PI-futex support + * + * Special API call for PI-futex support. This initializes the rtmutex and + * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not + * possible at this point because the pi_state which contains the rtmutex + * is not yet visible to other tasks. */ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner) @@ -1637,10 +1641,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, /** * rt_mutex_proxy_unlock - release a lock on behalf of owner * - * @lock: the rt_mutex to be locked + * @lock: the rt_mutex to be locked * * No locking. Caller has to do serializing itself - * Special API call for PI-futex support + * + * Special API call for PI-futex support. This merrily cleans up the rtmutex + * (debugging) state. Concurrent operations on this rt_mutex are not + * possible because it belongs to the pi_state which is about to be freed + * and it is not longer visible to other tasks. */ void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index e317e1cbb3eb..990134617b4c 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -71,13 +71,12 @@ task_top_pi_waiter(struct task_struct *p) * lock->owner state tracking: */ #define RT_MUTEX_HAS_WAITERS 1UL -#define RT_MUTEX_OWNER_MASKALL 1UL static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { unsigned long owner = (unsigned long) READ_ONCE(lock->owner); - return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL); + return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); } /* diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2337b4bb2366..631506004f9e 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -225,7 +225,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); waiter.task = tsk; waiter.type = RWSEM_WAITING_FOR_READ; @@ -336,7 +336,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) goto done; } - ret = owner->on_cpu; + /* + * As lock holder preemption issue, we both skip spinning if task is not + * on cpu or its cpu is preempted + */ + ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); done: rcu_read_unlock(); return ret; @@ -362,13 +366,17 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) */ barrier(); - /* abort spinning when need_resched or owner is not running */ - if (!owner->on_cpu || need_resched()) { + /* + * abort spinning when need_resched or owner is not running or + * owner's cpu is preempted. + */ + if (!owner->on_cpu || need_resched() || + vcpu_is_preempted(task_cpu(owner))) { rcu_read_unlock(); return false; } - cpu_relax_lowlatency(); + cpu_relax(); } rcu_read_unlock(); out: @@ -423,7 +431,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax_lowlatency(); + cpu_relax(); } osq_unlock(&sem->osq); done: @@ -461,7 +469,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); @@ -495,7 +503,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * wake any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS) { - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* @@ -571,7 +579,7 @@ __visible struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); /* * If a spinner is present, it is not necessary to do the wakeup. @@ -625,7 +633,7 @@ __visible struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->wait_lock, flags); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 154fd689fe02..8b08fb257856 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -75,11 +75,11 @@ #include #include #include +#include #include #include #include -#include #ifdef CONFIG_PARAVIRT #include #endif diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ec9ab2f01489..1eb82661ecdb 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -194,7 +194,7 @@ static int multi_cpu_stop(void *data) /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax(); + cpu_relax_yield(); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { diff --git a/lib/lockref.c b/lib/lockref.c index 5a92189ad711..c4bfcb8836cd 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -20,7 +20,7 @@ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ - cpu_relax_lowlatency(); \ + cpu_relax(); \ } \ } while (0) diff --git a/net/core/dev.c b/net/core/dev.c index 1d33ce03365f..3fec23cdeb80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5064,7 +5064,7 @@ count: return rc; goto restart; } - cpu_relax_lowlatency(); + cpu_relax(); } if (napi_poll) busy_poll_stop(napi, have_poll_lock); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a8368d1c4348..23f462f64a3f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6076,6 +6076,12 @@ sub process { } } +# check for mutex_trylock_recursive usage + if ($line =~ /mutex_trylock_recursive/) { + ERROR("LOCKING", + "recursive locking is bad, do not use this ever.\n" . $herecurr); + } + # check for lockdep_set_novalidate_class if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ || $line =~ /__lockdep_no_validate__\s*\)/ ) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7f9ee2929cfe..c55f5d6a7767 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1972,30 +1972,38 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; + gpa_t gpa = ghc->gpa + offset; - BUG_ON(len > ghc->len); + BUG_ON(len + offset > ghc->len); if (slots->generation != ghc->generation) kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, ghc->gpa, data, len); + return kvm_write_guest(kvm, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; - r = __copy_to_user((void __user *)ghc->hva, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT); return 0; } +EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); + +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) +{ + return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); +} EXPORT_SYMBOL_GPL(kvm_write_guest_cached); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,