Locking changes for v6.10:
- Over a dozen code generation micro-optimizations for the atomic and spinlock code. - Add more __ro_after_init attributes - Robustify the lockdevent_*() macros Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmZBrMMRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gSuA//YyLRTCGtH6d/fCudlzzoa14MHO/QiCv7 lgmq3Vqif/m+MW7LwQJbLrxDPJPT1mE9Ol9woOc133Cj1QZhF/HQvDAKT9ZpMoXU d8U3kuZ7tN41TJuQx6vNSCv3w5ToKeXaQJGxiT6od2Y/0QlhUKhVBSBQVtyc/ma6 o1Uhq1Qp5KPj928jiqwI0JCZJFqqLvzq/rIT38V05phHEPet4GbLMbz9ZTsw70pm xmLzGLXJQ9maziuVcmRUrctsAkbk+VhChQ9p4HrH6AcYPwyQoF+zJr7iocyzIMG2 xQqhEYShI72lcRft8hZwlrLTKZJWSAkDIxIxaQ2egzsNBwBPbRpP0mUIz3qbwJxQ fqzKGxwDmxjiX1Ib4gIVje66hp2QpPX5G1ARoeKvbrHkXxzqVuFlaQBn1+OAQ/GV mNzKADxrjalhyiMksHXbEbUNEvXCGqC2N9AOWT6XNvpLDqTJBz/wB+f9cbx3gYEO 9rXwVicWXLzUnEfbRaEjCrDeMEHMLqhaZIndgCx07JpFkkTtKLD1N9tBxFPNH+SP XK7SAsXrxwhBjGbWItfF4eOaPCey+/+kGhOPadfTg3g9zDjEBvX/YNBBw9q2CUWc JWd/gct+/Jnnkh1jdIj9yRF2xciVY+iOshHRzG+clo/PhRTwv+DwfMJ/uzn+oaSF vOT+exKA8bg= =rT48 -----END PGP SIGNATURE----- Merge tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull locking updates from Ingo Molnar: - Over a dozen code generation micro-optimizations for the atomic and spinlock code - Add more __ro_after_init attributes - Robustify the lockdevent_*() macros * tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/pvqspinlock/x86: Use _Q_LOCKED_VAL in PV_UNLOCK_ASM macro locking/qspinlock/x86: Micro-optimize virt_spin_lock() locking/atomic/x86: Merge __arch{,_try}_cmpxchg64_emu_local() with __arch{,_try}_cmpxchg64_emu() locking/atomic/x86: Introduce arch_try_cmpxchg64_local() locking/pvqspinlock/x86: Remove redundant CMP after CMPXCHG in __raw_callee_save___pv_queued_spin_unlock() locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() locking/qspinlock: Use atomic_try_cmpxchg_relaxed() in xchg_tail() locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32 locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32 locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 locking/atomic/x86: Modernize x86_32 arch_{,try_}_cmpxchg64{,_local}() locking/atomic/x86: Correct the definition of __arch_try_cmpxchg128() x86/tsc: Make __use_tsc __ro_after_init x86/kvm: Make kvm_async_pf_enabled __ro_after_init context_tracking: Make context_tracking_key __ro_after_init jump_label,module: Don't alloc static_key_mod for __ro_after_init keys locking/qspinlock: Always evaluate lockevent* non-event parameter once
This commit is contained in:
commit
48fc82c40b
@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
|
||||
}
|
||||
#define arch_atomic_add_return arch_atomic_add_return
|
||||
|
||||
static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
|
||||
{
|
||||
return arch_atomic_add_return(-i, v);
|
||||
}
|
||||
#define arch_atomic_sub_return arch_atomic_sub_return
|
||||
#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
|
||||
|
||||
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
|
||||
{
|
||||
@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
|
||||
}
|
||||
#define arch_atomic_fetch_add arch_atomic_fetch_add
|
||||
|
||||
static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
|
||||
{
|
||||
return xadd(&v->counter, -i);
|
||||
}
|
||||
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
|
||||
#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
|
||||
|
||||
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
|
||||
{
|
||||
|
@ -14,6 +14,32 @@ typedef struct {
|
||||
|
||||
#define ATOMIC64_INIT(val) { (val) }
|
||||
|
||||
/*
|
||||
* Read an atomic64_t non-atomically.
|
||||
*
|
||||
* This is intended to be used in cases where a subsequent atomic operation
|
||||
* will handle the torn value, and can be used to prime the first iteration
|
||||
* of unconditional try_cmpxchg() loops, e.g.:
|
||||
*
|
||||
* s64 val = arch_atomic64_read_nonatomic(v);
|
||||
* do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
|
||||
*
|
||||
* This is NOT safe to use where the value is not always checked by a
|
||||
* subsequent atomic operation, such as in conditional try_cmpxchg() loops
|
||||
* that can break before the atomic operation, e.g.:
|
||||
*
|
||||
* s64 val = arch_atomic64_read_nonatomic(v);
|
||||
* do {
|
||||
* if (condition(val))
|
||||
* break;
|
||||
* } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
|
||||
*/
|
||||
static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
|
||||
{
|
||||
/* See comment in arch_atomic_read(). */
|
||||
return __READ_ONCE(v->counter);
|
||||
}
|
||||
|
||||
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
|
||||
#ifndef ATOMIC64_EXPORT
|
||||
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
|
||||
@ -61,12 +87,18 @@ ATOMIC64_DECL(add_unless);
|
||||
#undef __ATOMIC64_DECL
|
||||
#undef ATOMIC64_EXPORT
|
||||
|
||||
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
|
||||
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
|
||||
{
|
||||
return arch_cmpxchg64(&v->counter, o, n);
|
||||
return arch_cmpxchg64(&v->counter, old, new);
|
||||
}
|
||||
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
|
||||
|
||||
static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
|
||||
{
|
||||
return arch_try_cmpxchg64(&v->counter, old, new);
|
||||
}
|
||||
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
|
||||
|
||||
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
|
||||
{
|
||||
s64 o;
|
||||
@ -195,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
|
||||
|
||||
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
|
||||
}
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
|
||||
|
||||
return old;
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
|
||||
|
||||
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
|
||||
}
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
|
||||
|
||||
return old;
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
|
||||
|
||||
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
|
||||
}
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
|
||||
|
||||
return old;
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
|
||||
{
|
||||
s64 old, c = 0;
|
||||
s64 val = arch_atomic64_read_nonatomic(v);
|
||||
|
||||
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
|
||||
c = old;
|
||||
do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
|
||||
|
||||
return old;
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
|
||||
|
||||
|
@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
|
||||
}
|
||||
#define arch_atomic64_add_return arch_atomic64_add_return
|
||||
|
||||
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
|
||||
{
|
||||
return arch_atomic64_add_return(-i, v);
|
||||
}
|
||||
#define arch_atomic64_sub_return arch_atomic64_sub_return
|
||||
#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
|
||||
{
|
||||
@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
|
||||
}
|
||||
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
|
||||
|
||||
static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
|
||||
{
|
||||
return xadd(&v->counter, -i);
|
||||
}
|
||||
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
|
||||
#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
|
||||
|
||||
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
|
||||
{
|
||||
|
@ -3,103 +3,150 @@
|
||||
#define _ASM_X86_CMPXCHG_32_H
|
||||
|
||||
/*
|
||||
* Note: if you use set64_bit(), __cmpxchg64(), or their variants,
|
||||
* Note: if you use __cmpxchg64(), or their variants,
|
||||
* you need to test for the feature in boot_cpu_data.
|
||||
*/
|
||||
|
||||
union __u64_halves {
|
||||
u64 full;
|
||||
struct {
|
||||
u32 low, high;
|
||||
};
|
||||
};
|
||||
|
||||
#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
|
||||
({ \
|
||||
union __u64_halves o = { .full = (_old), }, \
|
||||
n = { .full = (_new), }; \
|
||||
\
|
||||
asm volatile(_lock "cmpxchg8b %[ptr]" \
|
||||
: [ptr] "+m" (*(_ptr)), \
|
||||
"+a" (o.low), "+d" (o.high) \
|
||||
: "b" (n.low), "c" (n.high) \
|
||||
: "memory"); \
|
||||
\
|
||||
o.full; \
|
||||
})
|
||||
|
||||
|
||||
static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
|
||||
}
|
||||
|
||||
static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
return __arch_cmpxchg64(ptr, old, new,);
|
||||
}
|
||||
|
||||
#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
|
||||
({ \
|
||||
union __u64_halves o = { .full = *(_oldp), }, \
|
||||
n = { .full = (_new), }; \
|
||||
bool ret; \
|
||||
\
|
||||
asm volatile(_lock "cmpxchg8b %[ptr]" \
|
||||
CC_SET(e) \
|
||||
: CC_OUT(e) (ret), \
|
||||
[ptr] "+m" (*(_ptr)), \
|
||||
"+a" (o.low), "+d" (o.high) \
|
||||
: "b" (n.low), "c" (n.high) \
|
||||
: "memory"); \
|
||||
\
|
||||
if (unlikely(!ret)) \
|
||||
*(_oldp) = o.full; \
|
||||
\
|
||||
likely(ret); \
|
||||
})
|
||||
|
||||
static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
|
||||
{
|
||||
return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
|
||||
}
|
||||
|
||||
static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
|
||||
{
|
||||
return __arch_try_cmpxchg64(ptr, oldp, new,);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_CMPXCHG64
|
||||
#define arch_cmpxchg64(ptr, o, n) \
|
||||
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
|
||||
(unsigned long long)(n)))
|
||||
#define arch_cmpxchg64_local(ptr, o, n) \
|
||||
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
|
||||
(unsigned long long)(n)))
|
||||
#define arch_try_cmpxchg64(ptr, po, n) \
|
||||
__try_cmpxchg64((ptr), (unsigned long long *)(po), \
|
||||
(unsigned long long)(n))
|
||||
#endif
|
||||
|
||||
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
u64 prev;
|
||||
asm volatile(LOCK_PREFIX "cmpxchg8b %1"
|
||||
: "=A" (prev),
|
||||
"+m" (*ptr)
|
||||
: "b" ((u32)new),
|
||||
"c" ((u32)(new >> 32)),
|
||||
"0" (old)
|
||||
: "memory");
|
||||
return prev;
|
||||
}
|
||||
#define arch_cmpxchg64 __cmpxchg64
|
||||
|
||||
static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
u64 prev;
|
||||
asm volatile("cmpxchg8b %1"
|
||||
: "=A" (prev),
|
||||
"+m" (*ptr)
|
||||
: "b" ((u32)new),
|
||||
"c" ((u32)(new >> 32)),
|
||||
"0" (old)
|
||||
: "memory");
|
||||
return prev;
|
||||
}
|
||||
#define arch_cmpxchg64_local __cmpxchg64_local
|
||||
|
||||
static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
|
||||
{
|
||||
bool success;
|
||||
u64 old = *pold;
|
||||
asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
|
||||
CC_SET(z)
|
||||
: CC_OUT(z) (success),
|
||||
[ptr] "+m" (*ptr),
|
||||
"+A" (old)
|
||||
: "b" ((u32)new),
|
||||
"c" ((u32)(new >> 32))
|
||||
: "memory");
|
||||
#define arch_try_cmpxchg64 __try_cmpxchg64
|
||||
|
||||
if (unlikely(!success))
|
||||
*pold = old;
|
||||
return success;
|
||||
}
|
||||
#define arch_try_cmpxchg64_local __try_cmpxchg64_local
|
||||
|
||||
#else
|
||||
|
||||
#ifndef CONFIG_X86_CMPXCHG64
|
||||
/*
|
||||
* Building a kernel capable running on 80386 and 80486. It may be necessary
|
||||
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
|
||||
*/
|
||||
|
||||
#define arch_cmpxchg64(ptr, o, n) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__typeof__(*(ptr)) __old = (o); \
|
||||
__typeof__(*(ptr)) __new = (n); \
|
||||
alternative_io(LOCK_PREFIX_HERE \
|
||||
"call cmpxchg8b_emu", \
|
||||
"lock; cmpxchg8b (%%esi)" , \
|
||||
X86_FEATURE_CX8, \
|
||||
"=A" (__ret), \
|
||||
"S" ((ptr)), "0" (__old), \
|
||||
"b" ((unsigned int)__new), \
|
||||
"c" ((unsigned int)(__new>>32)) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
|
||||
({ \
|
||||
union __u64_halves o = { .full = (_old), }, \
|
||||
n = { .full = (_new), }; \
|
||||
\
|
||||
asm volatile(ALTERNATIVE(_lock_loc \
|
||||
"call cmpxchg8b_emu", \
|
||||
_lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
|
||||
: [ptr] "+m" (*(_ptr)), \
|
||||
"+a" (o.low), "+d" (o.high) \
|
||||
: "b" (n.low), "c" (n.high), "S" (_ptr) \
|
||||
: "memory"); \
|
||||
\
|
||||
o.full; \
|
||||
})
|
||||
|
||||
static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; ");
|
||||
}
|
||||
#define arch_cmpxchg64 arch_cmpxchg64
|
||||
|
||||
#define arch_cmpxchg64_local(ptr, o, n) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__typeof__(*(ptr)) __old = (o); \
|
||||
__typeof__(*(ptr)) __new = (n); \
|
||||
alternative_io("call cmpxchg8b_emu", \
|
||||
"cmpxchg8b (%%esi)" , \
|
||||
X86_FEATURE_CX8, \
|
||||
"=A" (__ret), \
|
||||
"S" ((ptr)), "0" (__old), \
|
||||
"b" ((unsigned int)__new), \
|
||||
"c" ((unsigned int)(__new>>32)) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
|
||||
{
|
||||
return __arch_cmpxchg64_emu(ptr, old, new, ,);
|
||||
}
|
||||
#define arch_cmpxchg64_local arch_cmpxchg64_local
|
||||
|
||||
#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
|
||||
({ \
|
||||
union __u64_halves o = { .full = *(_oldp), }, \
|
||||
n = { .full = (_new), }; \
|
||||
bool ret; \
|
||||
\
|
||||
asm volatile(ALTERNATIVE(_lock_loc \
|
||||
"call cmpxchg8b_emu", \
|
||||
_lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
|
||||
CC_SET(e) \
|
||||
: CC_OUT(e) (ret), \
|
||||
[ptr] "+m" (*(_ptr)), \
|
||||
"+a" (o.low), "+d" (o.high) \
|
||||
: "b" (n.low), "c" (n.high), "S" (_ptr) \
|
||||
: "memory"); \
|
||||
\
|
||||
if (unlikely(!ret)) \
|
||||
*(_oldp) = o.full; \
|
||||
\
|
||||
likely(ret); \
|
||||
})
|
||||
|
||||
static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
|
||||
{
|
||||
return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; ");
|
||||
}
|
||||
#define arch_try_cmpxchg64 arch_try_cmpxchg64
|
||||
|
||||
static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
|
||||
{
|
||||
return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
|
||||
}
|
||||
#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -20,6 +20,12 @@
|
||||
arch_try_cmpxchg((ptr), (po), (n)); \
|
||||
})
|
||||
|
||||
#define arch_try_cmpxchg64_local(ptr, po, n) \
|
||||
({ \
|
||||
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
|
||||
arch_try_cmpxchg_local((ptr), (po), (n)); \
|
||||
})
|
||||
|
||||
union __u128_halves {
|
||||
u128 full;
|
||||
struct {
|
||||
@ -62,7 +68,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
|
||||
asm volatile(_lock "cmpxchg16b %[ptr]" \
|
||||
CC_SET(e) \
|
||||
: CC_OUT(e) (ret), \
|
||||
[ptr] "+m" (*ptr), \
|
||||
[ptr] "+m" (*(_ptr)), \
|
||||
"+a" (o.low), "+d" (o.high) \
|
||||
: "b" (n.low), "c" (n.high) \
|
||||
: "memory"); \
|
||||
|
@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
|
||||
#define virt_spin_lock virt_spin_lock
|
||||
static inline bool virt_spin_lock(struct qspinlock *lock)
|
||||
{
|
||||
int val;
|
||||
|
||||
if (!static_branch_likely(&virt_spin_lock_key))
|
||||
return false;
|
||||
|
||||
@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
|
||||
* horrible lock 'holder' preemption issues.
|
||||
*/
|
||||
|
||||
do {
|
||||
while (atomic_read(&lock->val) != 0)
|
||||
cpu_relax();
|
||||
} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
|
||||
__retry:
|
||||
val = atomic_read(&lock->val);
|
||||
|
||||
if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
|
||||
cpu_relax();
|
||||
goto __retry;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
|
||||
*
|
||||
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
* {
|
||||
* u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
|
||||
* u8 lockval = _Q_LOCKED_VAL;
|
||||
*
|
||||
* if (likely(lockval == _Q_LOCKED_VAL))
|
||||
* if (try_cmpxchg(&lock->locked, &lockval, 0))
|
||||
* return;
|
||||
* pv_queued_spin_unlock_slowpath(lock, lockval);
|
||||
* }
|
||||
@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
|
||||
#define PV_UNLOCK_ASM \
|
||||
FRAME_BEGIN \
|
||||
"push %rdx\n\t" \
|
||||
"mov $0x1,%eax\n\t" \
|
||||
"mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
|
||||
"xor %edx,%edx\n\t" \
|
||||
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
|
||||
"cmp $0x1,%al\n\t" \
|
||||
"jne .slowpath\n\t" \
|
||||
"pop %rdx\n\t" \
|
||||
FRAME_END \
|
||||
|
@ -44,7 +44,7 @@
|
||||
#include <asm/svm.h>
|
||||
#include <asm/e820/api.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
||||
DEFINE_STATIC_KEY_FALSE_RO(kvm_async_pf_enabled);
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
|
@ -44,7 +44,7 @@ EXPORT_SYMBOL(tsc_khz);
|
||||
static int __read_mostly tsc_unstable;
|
||||
static unsigned int __initdata tsc_early_khz;
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
|
||||
static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc);
|
||||
|
||||
int tsc_clocksource_reliable;
|
||||
|
||||
|
@ -180,6 +180,11 @@ static inline bool is_kernel_rodata(unsigned long addr)
|
||||
addr < (unsigned long)__end_rodata;
|
||||
}
|
||||
|
||||
static inline bool is_kernel_ro_after_init(unsigned long addr)
|
||||
{
|
||||
return addr >= (unsigned long)__start_ro_after_init &&
|
||||
addr < (unsigned long)__end_ro_after_init;
|
||||
}
|
||||
/**
|
||||
* is_kernel_inittext - checks if the pointer address is located in the
|
||||
* .init.text section
|
||||
|
@ -216,6 +216,7 @@ extern struct jump_entry __start___jump_table[];
|
||||
extern struct jump_entry __stop___jump_table[];
|
||||
|
||||
extern void jump_label_init(void);
|
||||
extern void jump_label_init_ro(void);
|
||||
extern void jump_label_lock(void);
|
||||
extern void jump_label_unlock(void);
|
||||
extern void arch_jump_label_transform(struct jump_entry *entry,
|
||||
@ -265,6 +266,8 @@ static __always_inline void jump_label_init(void)
|
||||
static_key_initialized = true;
|
||||
}
|
||||
|
||||
static __always_inline void jump_label_init_ro(void) { }
|
||||
|
||||
static __always_inline bool static_key_false(struct static_key *key)
|
||||
{
|
||||
if (unlikely_notrace(static_key_count(key) > 0))
|
||||
|
@ -1415,6 +1415,7 @@ static void mark_readonly(void)
|
||||
* insecure pages which are W+X.
|
||||
*/
|
||||
flush_module_init_free_work();
|
||||
jump_label_init_ro();
|
||||
mark_rodata_ro();
|
||||
debug_checkwx();
|
||||
rodata_test();
|
||||
|
@ -432,7 +432,7 @@ static __always_inline void ct_kernel_enter(bool user, int offset) { }
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(context_tracking_key);
|
||||
DEFINE_STATIC_KEY_FALSE_RO(context_tracking_key);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_key);
|
||||
|
||||
static noinstr bool context_tracking_recursion_enter(void)
|
||||
|
@ -530,6 +530,45 @@ void __init jump_label_init(void)
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static inline bool static_key_sealed(struct static_key *key)
|
||||
{
|
||||
return (key->type & JUMP_TYPE_LINKED) && !(key->type & ~JUMP_TYPE_MASK);
|
||||
}
|
||||
|
||||
static inline void static_key_seal(struct static_key *key)
|
||||
{
|
||||
unsigned long type = key->type & JUMP_TYPE_TRUE;
|
||||
key->type = JUMP_TYPE_LINKED | type;
|
||||
}
|
||||
|
||||
void jump_label_init_ro(void)
|
||||
{
|
||||
struct jump_entry *iter_start = __start___jump_table;
|
||||
struct jump_entry *iter_stop = __stop___jump_table;
|
||||
struct jump_entry *iter;
|
||||
|
||||
if (WARN_ON_ONCE(!static_key_initialized))
|
||||
return;
|
||||
|
||||
cpus_read_lock();
|
||||
jump_label_lock();
|
||||
|
||||
for (iter = iter_start; iter < iter_stop; iter++) {
|
||||
struct static_key *iterk = jump_entry_key(iter);
|
||||
|
||||
if (!is_kernel_ro_after_init((unsigned long)iterk))
|
||||
continue;
|
||||
|
||||
if (static_key_sealed(iterk))
|
||||
continue;
|
||||
|
||||
static_key_seal(iterk);
|
||||
}
|
||||
|
||||
jump_label_unlock();
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
enum jump_label_type jump_label_init_type(struct jump_entry *entry)
|
||||
@ -650,6 +689,15 @@ static int jump_label_add_module(struct module *mod)
|
||||
static_key_set_entries(key, iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the key was sealed at init, then there's no need to keep a
|
||||
* reference to its module entries - just patch them now and be
|
||||
* done with it.
|
||||
*/
|
||||
if (static_key_sealed(key))
|
||||
goto do_poke;
|
||||
|
||||
jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
|
||||
if (!jlm)
|
||||
return -ENOMEM;
|
||||
@ -675,6 +723,7 @@ static int jump_label_add_module(struct module *mod)
|
||||
static_key_set_linked(key);
|
||||
|
||||
/* Only update if we've changed from our initial state */
|
||||
do_poke:
|
||||
if (jump_label_type(iter) != jump_label_init_type(iter))
|
||||
__jump_label_update(key, iter, iter_stop, true);
|
||||
}
|
||||
@ -699,6 +748,10 @@ static void jump_label_del_module(struct module *mod)
|
||||
if (within_module((unsigned long)key, mod))
|
||||
continue;
|
||||
|
||||
/* No @jlm allocated because key was sealed at init. */
|
||||
if (static_key_sealed(key))
|
||||
continue;
|
||||
|
||||
/* No memory during module load */
|
||||
if (WARN_ON(!static_key_linked(key)))
|
||||
continue;
|
||||
|
@ -53,8 +53,8 @@ static inline void __lockevent_add(enum lock_events event, int inc)
|
||||
#else /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
#define lockevent_inc(ev)
|
||||
#define lockevent_add(ev, c)
|
||||
#define lockevent_cond_inc(ev, c)
|
||||
#define lockevent_add(ev, c) do { (void)(c); } while (0)
|
||||
#define lockevent_cond_inc(ev, c) do { (void)(c); } while (0)
|
||||
|
||||
#endif /* CONFIG_LOCK_EVENT_COUNTS */
|
||||
|
||||
|
@ -220,21 +220,18 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
|
||||
*/
|
||||
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
||||
{
|
||||
u32 old, new, val = atomic_read(&lock->val);
|
||||
u32 old, new;
|
||||
|
||||
for (;;) {
|
||||
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
|
||||
old = atomic_read(&lock->val);
|
||||
do {
|
||||
new = (old & _Q_LOCKED_PENDING_MASK) | tail;
|
||||
/*
|
||||
* We can use relaxed semantics since the caller ensures that
|
||||
* the MCS node is properly initialized before updating the
|
||||
* tail.
|
||||
*/
|
||||
old = atomic_cmpxchg_relaxed(&lock->val, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
|
||||
|
||||
val = old;
|
||||
}
|
||||
return old;
|
||||
}
|
||||
#endif /* _Q_PENDING_BITS == 8 */
|
||||
|
@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
|
||||
*/
|
||||
for (;;) {
|
||||
int val = atomic_read(&lock->val);
|
||||
u8 old = 0;
|
||||
|
||||
if (!(val & _Q_LOCKED_PENDING_MASK) &&
|
||||
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
|
||||
try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
|
||||
lockevent_inc(pv_lock_stealing);
|
||||
return true;
|
||||
}
|
||||
@ -116,11 +117,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
|
||||
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
|
||||
* lock just to be sure that it will get it.
|
||||
*/
|
||||
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
|
||||
static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
|
||||
{
|
||||
u16 old = _Q_PENDING_VAL;
|
||||
|
||||
return !READ_ONCE(lock->locked) &&
|
||||
(cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
|
||||
_Q_LOCKED_VAL) == _Q_PENDING_VAL);
|
||||
try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
|
||||
}
|
||||
#else /* _Q_PENDING_BITS == 8 */
|
||||
static __always_inline void set_pending(struct qspinlock *lock)
|
||||
@ -128,27 +130,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
|
||||
atomic_or(_Q_PENDING_VAL, &lock->val);
|
||||
}
|
||||
|
||||
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
|
||||
static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
|
||||
{
|
||||
int val = atomic_read(&lock->val);
|
||||
|
||||
for (;;) {
|
||||
int old, new;
|
||||
|
||||
if (val & _Q_LOCKED_MASK)
|
||||
break;
|
||||
int old, new;
|
||||
|
||||
old = atomic_read(&lock->val);
|
||||
do {
|
||||
if (old & _Q_LOCKED_MASK)
|
||||
return false;
|
||||
/*
|
||||
* Try to clear pending bit & set locked bit
|
||||
*/
|
||||
old = val;
|
||||
new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
|
||||
val = atomic_cmpxchg_acquire(&lock->val, old, new);
|
||||
new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
|
||||
} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
|
||||
|
||||
if (val == old)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
#endif /* _Q_PENDING_BITS == 8 */
|
||||
|
||||
@ -216,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
|
||||
int hopcnt = 0;
|
||||
|
||||
for_each_hash_entry(he, offset, hash) {
|
||||
struct qspinlock *old = NULL;
|
||||
hopcnt++;
|
||||
if (!cmpxchg(&he->lock, NULL, lock)) {
|
||||
if (try_cmpxchg(&he->lock, &old, lock)) {
|
||||
WRITE_ONCE(he->node, node);
|
||||
lockevent_pv_hop(hopcnt);
|
||||
return &he->lock;
|
||||
@ -294,7 +291,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
struct pv_node *pp = (struct pv_node *)prev;
|
||||
bool __maybe_unused wait_early;
|
||||
bool wait_early;
|
||||
int loop;
|
||||
|
||||
for (;;) {
|
||||
@ -360,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
|
||||
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
|
||||
enum vcpu_state old = vcpu_halted;
|
||||
/*
|
||||
* If the vCPU is indeed halted, advance its state to match that of
|
||||
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
|
||||
@ -377,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
* subsequent writes.
|
||||
*/
|
||||
smp_mb__before_atomic();
|
||||
if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
|
||||
!= vcpu_halted)
|
||||
if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -546,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
|
||||
#ifndef __pv_queued_spin_unlock
|
||||
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
u8 locked;
|
||||
u8 locked = _Q_LOCKED_VAL;
|
||||
|
||||
/*
|
||||
* We must not unlock if SLOW, because in that case we must first
|
||||
* unhash. Otherwise it would be possible to have multiple @lock
|
||||
* entries, which would be BAD.
|
||||
*/
|
||||
locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
|
||||
if (likely(locked == _Q_LOCKED_VAL))
|
||||
if (try_cmpxchg_release(&lock->locked, &locked, 0))
|
||||
return;
|
||||
|
||||
__pv_queued_spin_unlock_slowpath(lock, locked);
|
||||
|
Loading…
x
Reference in New Issue
Block a user