2022-11-28 13:11:13 +10:00
// SPDX-License-Identifier: GPL-2.0-or-later
2022-11-26 19:59:16 +10:00
# include <linux/bug.h>
# include <linux/compiler.h>
2022-11-28 13:11:13 +10:00
# include <linux/export.h>
2022-11-26 19:59:16 +10:00
# include <linux/percpu.h>
# include <linux/smp.h>
2022-11-26 19:59:29 +10:00
# include <linux/topology.h>
2022-11-26 19:59:31 +10:00
# include <linux/sched/clock.h>
2022-11-28 13:11:13 +10:00
# include <asm/qspinlock.h>
2022-11-26 19:59:22 +10:00
# include <asm/paravirt.h>
2022-11-28 13:11:13 +10:00
2022-11-26 19:59:16 +10:00
# define MAX_NODES 4
struct qnode {
struct qnode * next ;
struct qspinlock * lock ;
2022-11-26 19:59:26 +10:00
int cpu ;
2022-11-26 19:59:25 +10:00
int yield_cpu ;
2022-11-26 19:59:16 +10:00
u8 locked ; /* 1 if lock acquired */
} ;
struct qnodes {
int count ;
struct qnode nodes [ MAX_NODES ] ;
} ;
2022-11-26 19:59:19 +10:00
/* Tuning parameters */
static int steal_spins __read_mostly = ( 1 < < 5 ) ;
2022-11-26 19:59:29 +10:00
static int remote_steal_spins __read_mostly = ( 1 < < 2 ) ;
2022-11-26 19:59:27 +10:00
# if _Q_SPIN_TRY_LOCK_STEAL == 1
static const bool maybe_stealers = true ;
# else
2022-11-26 19:59:19 +10:00
static bool maybe_stealers __read_mostly = true ;
2022-11-26 19:59:27 +10:00
# endif
2022-11-26 19:59:20 +10:00
static int head_spins __read_mostly = ( 1 < < 8 ) ;
2022-11-26 19:59:19 +10:00
2022-11-26 19:59:22 +10:00
static bool pv_yield_owner __read_mostly = true ;
2022-11-26 19:59:24 +10:00
static bool pv_yield_allow_steal __read_mostly = false ;
2022-11-26 19:59:30 +10:00
static bool pv_spin_on_preempted_owner __read_mostly = false ;
2022-11-26 19:59:31 +10:00
static bool pv_sleepy_lock __read_mostly = true ;
static bool pv_sleepy_lock_sticky __read_mostly = false ;
static u64 pv_sleepy_lock_interval_ns __read_mostly = 0 ;
static int pv_sleepy_lock_factor __read_mostly = 256 ;
2022-11-26 19:59:23 +10:00
static bool pv_yield_prev __read_mostly = true ;
2022-11-26 19:59:25 +10:00
static bool pv_yield_propagate_owner __read_mostly = true ;
2022-11-26 19:59:26 +10:00
static bool pv_prod_head __read_mostly = false ;
2022-11-26 19:59:22 +10:00
2022-11-26 19:59:16 +10:00
static DEFINE_PER_CPU_ALIGNED ( struct qnodes , qnodes ) ;
2022-11-26 19:59:31 +10:00
static DEFINE_PER_CPU_ALIGNED ( u64 , sleepy_lock_seen_clock ) ;
2022-11-26 19:59:16 +10:00
2022-11-26 19:59:32 +10:00
# if _Q_SPIN_SPEC_BARRIER == 1
# define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
# else
# define spec_barrier() do { } while (0)
# endif
2022-11-26 19:59:31 +10:00
static __always_inline bool recently_sleepy ( void )
2022-11-26 19:59:19 +10:00
{
2022-11-26 19:59:31 +10:00
/* pv_sleepy_lock is true when this is called */
if ( pv_sleepy_lock_interval_ns ) {
u64 seen = this_cpu_read ( sleepy_lock_seen_clock ) ;
if ( seen ) {
u64 delta = sched_clock ( ) - seen ;
if ( delta < pv_sleepy_lock_interval_ns )
return true ;
this_cpu_write ( sleepy_lock_seen_clock , 0 ) ;
}
}
return false ;
2022-11-26 19:59:19 +10:00
}
2022-11-26 19:59:31 +10:00
static __always_inline int get_steal_spins ( bool paravirt , bool sleepy )
2022-11-26 19:59:29 +10:00
{
2022-11-26 19:59:31 +10:00
if ( paravirt & & sleepy )
return steal_spins * pv_sleepy_lock_factor ;
else
return steal_spins ;
2022-11-26 19:59:29 +10:00
}
2022-11-26 19:59:31 +10:00
static __always_inline int get_remote_steal_spins ( bool paravirt , bool sleepy )
2022-11-26 19:59:20 +10:00
{
2022-11-26 19:59:31 +10:00
if ( paravirt & & sleepy )
return remote_steal_spins * pv_sleepy_lock_factor ;
else
return remote_steal_spins ;
}
static __always_inline int get_head_spins ( bool paravirt , bool sleepy )
{
if ( paravirt & & sleepy )
return head_spins * pv_sleepy_lock_factor ;
else
return head_spins ;
2022-11-26 19:59:20 +10:00
}
2022-11-26 19:59:18 +10:00
static inline u32 encode_tail_cpu ( int cpu )
2022-11-26 19:59:16 +10:00
{
return ( cpu + 1 ) < < _Q_TAIL_CPU_OFFSET ;
}
2022-11-26 19:59:18 +10:00
static inline int decode_tail_cpu ( u32 val )
2022-11-26 19:59:16 +10:00
{
return ( val > > _Q_TAIL_CPU_OFFSET ) - 1 ;
}
2022-11-26 19:59:22 +10:00
static inline int get_owner_cpu ( u32 val )
{
return ( val & _Q_OWNER_CPU_MASK ) > > _Q_OWNER_CPU_OFFSET ;
}
2022-11-26 19:59:16 +10:00
/*
* Try to acquire the lock if it was not already locked . If the tail matches
* mytail then clear it , otherwise leave it unchnaged . Return previous value .
*
* This is used by the head of the queue to acquire the lock and clean up
* its tail if it was the last one queued .
*/
2022-11-26 19:59:19 +10:00
static __always_inline u32 trylock_clean_tail ( struct qspinlock * lock , u32 tail )
2022-11-26 19:59:16 +10:00
{
2022-11-26 19:59:21 +10:00
u32 newval = queued_spin_encode_locked_val ( ) ;
2022-11-26 19:59:18 +10:00
u32 prev , tmp ;
asm volatile (
2022-11-26 19:59:19 +10:00
" 1: lwarx %0,0,%2,%7 # trylock_clean_tail \n "
/* This test is necessary if there could be stealers */
" andi. %1,%0,%5 \n "
" bne 3f \n "
/* Test whether the lock tail == mytail */
" and %1,%0,%6 \n "
2022-11-26 19:59:18 +10:00
" cmpw 0,%1,%3 \n "
/* Merge the new locked value */
" or %1,%1,%4 \n "
" bne 2f \n "
/* If the lock tail matched, then clear it, otherwise leave it. */
2022-11-26 19:59:19 +10:00
" andc %1,%1,%6 \n "
2022-11-26 19:59:18 +10:00
" 2: stwcx. %1,0,%2 \n "
" bne- 1b \n "
" \t " PPC_ACQUIRE_BARRIER " \n "
" 3: \n "
: " =&r " ( prev ) , " =&r " ( tmp )
: " r " ( & lock - > val ) , " r " ( tail ) , " r " ( newval ) ,
2022-11-26 19:59:19 +10:00
" i " ( _Q_LOCKED_VAL ) ,
2022-11-26 19:59:18 +10:00
" r " ( _Q_TAIL_CPU_MASK ) ,
2022-11-26 19:59:32 +10:00
" i " ( _Q_SPIN_EH_HINT )
2022-11-26 19:59:18 +10:00
: " cr0 " , " memory " ) ;
return prev ;
2022-11-26 19:59:16 +10:00
}
/*
* Publish our tail , replacing previous tail . Return previous value .
*
* This provides a release barrier for publishing node , this pairs with the
* acquire barrier in get_tail_qnode ( ) when the next CPU finds this tail
* value .
*/
2022-11-26 19:59:18 +10:00
static __always_inline u32 publish_tail_cpu ( struct qspinlock * lock , u32 tail )
2022-11-26 19:59:16 +10:00
{
2022-11-26 19:59:18 +10:00
u32 prev , tmp ;
asm volatile (
" \t " PPC_RELEASE_BARRIER " \n "
" 1: lwarx %0,0,%2 # publish_tail_cpu \n "
" andc %1,%0,%4 \n "
" or %1,%1,%3 \n "
" stwcx. %1,0,%2 \n "
" bne- 1b \n "
: " =&r " ( prev ) , " =&r " ( tmp )
: " r " ( & lock - > val ) , " r " ( tail ) , " r " ( _Q_TAIL_CPU_MASK )
: " cr0 " , " memory " ) ;
return prev ;
2022-11-26 19:59:16 +10:00
}
2022-11-26 19:59:20 +10:00
static __always_inline u32 set_mustq ( struct qspinlock * lock )
{
u32 prev ;
asm volatile (
" 1: lwarx %0,0,%1 # set_mustq \n "
" or %0,%0,%2 \n "
" stwcx. %0,0,%1 \n "
" bne- 1b \n "
: " =&r " ( prev )
: " r " ( & lock - > val ) , " r " ( _Q_MUST_Q_VAL )
: " cr0 " , " memory " ) ;
return prev ;
}
2022-11-26 19:59:24 +10:00
static __always_inline u32 clear_mustq ( struct qspinlock * lock )
{
u32 prev ;
asm volatile (
" 1: lwarx %0,0,%1 # clear_mustq \n "
" andc %0,%0,%2 \n "
" stwcx. %0,0,%1 \n "
" bne- 1b \n "
: " =&r " ( prev )
: " r " ( & lock - > val ) , " r " ( _Q_MUST_Q_VAL )
: " cr0 " , " memory " ) ;
return prev ;
}
2022-11-26 19:59:31 +10:00
static __always_inline bool try_set_sleepy ( struct qspinlock * lock , u32 old )
{
u32 prev ;
u32 new = old | _Q_SLEEPY_VAL ;
BUG_ON ( ! ( old & _Q_LOCKED_VAL ) ) ;
BUG_ON ( old & _Q_SLEEPY_VAL ) ;
asm volatile (
" 1: lwarx %0,0,%1 # try_set_sleepy \n "
" cmpw 0,%0,%2 \n "
" bne- 2f \n "
" stwcx. %3,0,%1 \n "
" bne- 1b \n "
" 2: \n "
: " =&r " ( prev )
: " r " ( & lock - > val ) , " r " ( old ) , " r " ( new )
: " cr0 " , " memory " ) ;
return likely ( prev = = old ) ;
}
static __always_inline void seen_sleepy_owner ( struct qspinlock * lock , u32 val )
{
if ( pv_sleepy_lock ) {
if ( pv_sleepy_lock_interval_ns )
this_cpu_write ( sleepy_lock_seen_clock , sched_clock ( ) ) ;
if ( ! ( val & _Q_SLEEPY_VAL ) )
try_set_sleepy ( lock , val ) ;
}
}
static __always_inline void seen_sleepy_lock ( void )
{
if ( pv_sleepy_lock & & pv_sleepy_lock_interval_ns )
this_cpu_write ( sleepy_lock_seen_clock , sched_clock ( ) ) ;
}
static __always_inline void seen_sleepy_node ( struct qspinlock * lock , u32 val )
{
if ( pv_sleepy_lock ) {
if ( pv_sleepy_lock_interval_ns )
this_cpu_write ( sleepy_lock_seen_clock , sched_clock ( ) ) ;
if ( val & _Q_LOCKED_VAL ) {
if ( ! ( val & _Q_SLEEPY_VAL ) )
try_set_sleepy ( lock , val ) ;
}
}
}
2022-11-26 19:59:18 +10:00
static struct qnode * get_tail_qnode ( struct qspinlock * lock , u32 val )
2022-11-26 19:59:16 +10:00
{
int cpu = decode_tail_cpu ( val ) ;
struct qnodes * qnodesp = per_cpu_ptr ( & qnodes , cpu ) ;
int idx ;
/*
* After publishing the new tail and finding a previous tail in the
* previous val ( which is the control dependency ) , this barrier
* orders the release barrier in publish_tail_cpu performed by the
* last CPU , with subsequently looking at its qnode structures
* after the barrier .
*/
smp_acquire__after_ctrl_dep ( ) ;
for ( idx = 0 ; idx < MAX_NODES ; idx + + ) {
struct qnode * qnode = & qnodesp - > nodes [ idx ] ;
if ( qnode - > lock = = lock )
return qnode ;
}
BUG ( ) ;
}
2022-11-26 19:59:30 +10:00
/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
static __always_inline bool __yield_to_locked_owner ( struct qspinlock * lock , u32 val , bool paravirt , bool mustq )
2022-11-26 19:59:22 +10:00
{
int owner ;
u32 yield_count ;
2022-11-26 19:59:30 +10:00
bool preempted = false ;
2022-11-26 19:59:22 +10:00
BUG_ON ( ! ( val & _Q_LOCKED_VAL ) ) ;
if ( ! paravirt )
goto relax ;
if ( ! pv_yield_owner )
goto relax ;
owner = get_owner_cpu ( val ) ;
yield_count = yield_count_of ( owner ) ;
if ( ( yield_count & 1 ) = = 0 )
goto relax ; /* owner vcpu is running */
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:31 +10:00
seen_sleepy_owner ( lock , val ) ;
2022-11-26 19:59:30 +10:00
preempted = true ;
2022-11-26 19:59:22 +10:00
/*
* Read the lock word after sampling the yield count . On the other side
* there may a wmb because the yield count update is done by the
* hypervisor preemption and the value update by the OS , however this
* ordering might reduce the chance of out of order accesses and
* improve the heuristic .
*/
smp_rmb ( ) ;
if ( READ_ONCE ( lock - > val ) = = val ) {
2022-11-26 19:59:24 +10:00
if ( mustq )
clear_mustq ( lock ) ;
2022-11-26 19:59:22 +10:00
yield_to_preempted ( owner , yield_count ) ;
2022-11-26 19:59:24 +10:00
if ( mustq )
set_mustq ( lock ) ;
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:30 +10:00
2022-11-26 19:59:22 +10:00
/* Don't relax if we yielded. Maybe we should? */
2022-11-26 19:59:30 +10:00
return preempted ;
2022-11-26 19:59:22 +10:00
}
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:22 +10:00
relax :
2022-11-26 19:59:28 +10:00
spin_cpu_relax ( ) ;
2022-11-26 19:59:30 +10:00
return preempted ;
2022-11-26 19:59:22 +10:00
}
2022-11-26 19:59:30 +10:00
/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
static __always_inline bool yield_to_locked_owner ( struct qspinlock * lock , u32 val , bool paravirt )
2022-11-26 19:59:24 +10:00
{
2022-11-26 19:59:30 +10:00
return __yield_to_locked_owner ( lock , val , paravirt , false ) ;
2022-11-26 19:59:24 +10:00
}
2022-11-26 19:59:30 +10:00
/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
static __always_inline bool yield_head_to_locked_owner ( struct qspinlock * lock , u32 val , bool paravirt )
2022-11-26 19:59:24 +10:00
{
bool mustq = false ;
if ( ( val & _Q_MUST_Q_VAL ) & & pv_yield_allow_steal )
mustq = true ;
2022-11-26 19:59:30 +10:00
return __yield_to_locked_owner ( lock , val , paravirt , mustq ) ;
2022-11-26 19:59:24 +10:00
}
2022-11-26 19:59:25 +10:00
static __always_inline void propagate_yield_cpu ( struct qnode * node , u32 val , int * set_yield_cpu , bool paravirt )
{
struct qnode * next ;
int owner ;
if ( ! paravirt )
return ;
if ( ! pv_yield_propagate_owner )
return ;
owner = get_owner_cpu ( val ) ;
if ( * set_yield_cpu = = owner )
return ;
next = READ_ONCE ( node - > next ) ;
if ( ! next )
return ;
if ( vcpu_is_preempted ( owner ) ) {
next - > yield_cpu = owner ;
* set_yield_cpu = owner ;
} else if ( * set_yield_cpu ! = - 1 ) {
next - > yield_cpu = owner ;
* set_yield_cpu = owner ;
}
}
2022-11-26 19:59:28 +10:00
/* Called inside spin_begin() */
2022-11-26 19:59:31 +10:00
static __always_inline bool yield_to_prev ( struct qspinlock * lock , struct qnode * node , u32 val , bool paravirt )
2022-11-26 19:59:23 +10:00
{
int prev_cpu = decode_tail_cpu ( val ) ;
u32 yield_count ;
2022-11-26 19:59:25 +10:00
int yield_cpu ;
2022-11-26 19:59:31 +10:00
bool preempted = false ;
2022-11-26 19:59:23 +10:00
if ( ! paravirt )
goto relax ;
2022-11-26 19:59:25 +10:00
if ( ! pv_yield_propagate_owner )
goto yield_prev ;
yield_cpu = READ_ONCE ( node - > yield_cpu ) ;
if ( yield_cpu = = - 1 ) {
/* Propagate back the -1 CPU */
if ( node - > next & & node - > next - > yield_cpu ! = - 1 )
node - > next - > yield_cpu = yield_cpu ;
goto yield_prev ;
}
yield_count = yield_count_of ( yield_cpu ) ;
if ( ( yield_count & 1 ) = = 0 )
goto yield_prev ; /* owner vcpu is running */
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:31 +10:00
preempted = true ;
seen_sleepy_node ( lock , val ) ;
2022-11-26 19:59:25 +10:00
smp_rmb ( ) ;
if ( yield_cpu = = node - > yield_cpu ) {
if ( node - > next & & node - > next - > yield_cpu ! = yield_cpu )
node - > next - > yield_cpu = yield_cpu ;
yield_to_preempted ( yield_cpu , yield_count ) ;
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:31 +10:00
return preempted ;
2022-11-26 19:59:25 +10:00
}
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:25 +10:00
yield_prev :
2022-11-26 19:59:23 +10:00
if ( ! pv_yield_prev )
goto relax ;
yield_count = yield_count_of ( prev_cpu ) ;
if ( ( yield_count & 1 ) = = 0 )
goto relax ; /* owner vcpu is running */
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:31 +10:00
preempted = true ;
seen_sleepy_node ( lock , val ) ;
2022-11-26 19:59:24 +10:00
smp_rmb ( ) ; /* See __yield_to_locked_owner comment */
2022-11-26 19:59:23 +10:00
if ( ! node - > locked ) {
yield_to_preempted ( prev_cpu , yield_count ) ;
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:31 +10:00
return preempted ;
2022-11-26 19:59:23 +10:00
}
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:23 +10:00
relax :
2022-11-26 19:59:28 +10:00
spin_cpu_relax ( ) ;
2022-11-26 19:59:31 +10:00
return preempted ;
2022-11-26 19:59:23 +10:00
}
2022-11-26 19:59:31 +10:00
static __always_inline bool steal_break ( u32 val , int iters , bool paravirt , bool sleepy )
2022-11-26 19:59:29 +10:00
{
2022-11-26 19:59:31 +10:00
if ( iters > = get_steal_spins ( paravirt , sleepy ) )
2022-11-26 19:59:29 +10:00
return true ;
if ( IS_ENABLED ( CONFIG_NUMA ) & &
2022-11-26 19:59:31 +10:00
( iters > = get_remote_steal_spins ( paravirt , sleepy ) ) ) {
2022-11-26 19:59:29 +10:00
int cpu = get_owner_cpu ( val ) ;
if ( numa_node_id ( ) ! = cpu_to_node ( cpu ) )
return true ;
}
return false ;
}
2022-11-26 19:59:22 +10:00
static __always_inline bool try_to_steal_lock ( struct qspinlock * lock , bool paravirt )
2022-11-26 19:59:19 +10:00
{
2022-11-26 19:59:31 +10:00
bool seen_preempted = false ;
bool sleepy = false ;
2022-11-26 19:59:19 +10:00
int iters = 0 ;
2022-11-26 19:59:29 +10:00
u32 val ;
2022-11-26 19:59:19 +10:00
2022-11-26 19:59:30 +10:00
if ( ! steal_spins ) {
/* XXX: should spin_on_preempted_owner do anything here? */
2022-11-26 19:59:19 +10:00
return false ;
2022-11-26 19:59:30 +10:00
}
2022-11-26 19:59:19 +10:00
/* Attempt to steal the lock */
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:19 +10:00
do {
2022-11-26 19:59:30 +10:00
bool preempted = false ;
2022-11-26 19:59:29 +10:00
val = READ_ONCE ( lock - > val ) ;
2022-11-26 19:59:20 +10:00
if ( val & _Q_MUST_Q_VAL )
break ;
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:20 +10:00
2022-11-26 19:59:19 +10:00
if ( unlikely ( ! ( val & _Q_LOCKED_VAL ) ) ) {
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:19 +10:00
if ( __queued_spin_trylock_steal ( lock ) )
return true ;
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:19 +10:00
} else {
2022-11-26 19:59:30 +10:00
preempted = yield_to_locked_owner ( lock , val , paravirt ) ;
2022-11-26 19:59:19 +10:00
}
2022-11-26 19:59:31 +10:00
if ( paravirt & & pv_sleepy_lock ) {
if ( ! sleepy ) {
if ( val & _Q_SLEEPY_VAL ) {
seen_sleepy_lock ( ) ;
sleepy = true ;
} else if ( recently_sleepy ( ) ) {
sleepy = true ;
}
}
if ( pv_sleepy_lock_sticky & & seen_preempted & &
! ( val & _Q_SLEEPY_VAL ) ) {
if ( try_set_sleepy ( lock , val ) )
val | = _Q_SLEEPY_VAL ;
}
}
2022-11-26 19:59:30 +10:00
if ( preempted ) {
2022-11-26 19:59:31 +10:00
seen_preempted = true ;
sleepy = true ;
2022-11-26 19:59:30 +10:00
if ( ! pv_spin_on_preempted_owner )
iters + + ;
/*
* pv_spin_on_preempted_owner don ' t increase iters
* while the owner is preempted - - we won ' t interfere
* with it by definition . This could introduce some
* latency issue if we continually observe preempted
* owners , but hopefully that ' s a rare corner case of
* a badly oversubscribed system .
*/
} else {
iters + + ;
}
2022-11-26 19:59:31 +10:00
} while ( ! steal_break ( val , iters , paravirt , sleepy ) ) ;
2022-11-26 19:59:19 +10:00
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:19 +10:00
return false ;
}
2022-11-26 19:59:22 +10:00
static __always_inline void queued_spin_lock_mcs_queue ( struct qspinlock * lock , bool paravirt )
2022-11-28 13:11:13 +10:00
{
2022-11-26 19:59:16 +10:00
struct qnodes * qnodesp ;
struct qnode * next , * node ;
2022-11-26 19:59:18 +10:00
u32 val , old , tail ;
2022-11-26 19:59:31 +10:00
bool seen_preempted = false ;
bool sleepy = false ;
2022-11-26 19:59:20 +10:00
bool mustq = false ;
2022-11-26 19:59:16 +10:00
int idx ;
2022-11-26 19:59:25 +10:00
int set_yield_cpu = - 1 ;
2022-11-26 19:59:20 +10:00
int iters = 0 ;
2022-11-26 19:59:16 +10:00
BUILD_BUG_ON ( CONFIG_NR_CPUS > = ( 1U < < _Q_TAIL_CPU_BITS ) ) ;
qnodesp = this_cpu_ptr ( & qnodes ) ;
if ( unlikely ( qnodesp - > count > = MAX_NODES ) ) {
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:16 +10:00
while ( ! queued_spin_trylock ( lock ) )
cpu_relax ( ) ;
return ;
}
idx = qnodesp - > count + + ;
/*
* Ensure that we increment the head node - > count before initialising
* the actual node . If the compiler is kind enough to reorder these
* stores , then an IRQ could overwrite our assignments .
*/
barrier ( ) ;
node = & qnodesp - > nodes [ idx ] ;
node - > next = NULL ;
node - > lock = lock ;
2022-11-26 19:59:26 +10:00
node - > cpu = smp_processor_id ( ) ;
2022-11-26 19:59:25 +10:00
node - > yield_cpu = - 1 ;
2022-11-26 19:59:16 +10:00
node - > locked = 0 ;
2022-11-26 19:59:26 +10:00
tail = encode_tail_cpu ( node - > cpu ) ;
2022-11-26 19:59:16 +10:00
old = publish_tail_cpu ( lock , tail ) ;
/*
* If there was a previous node ; link it and wait until reaching the
* head of the waitqueue .
*/
if ( old & _Q_TAIL_CPU_MASK ) {
struct qnode * prev = get_tail_qnode ( lock , old ) ;
/* Link @node into the waitqueue. */
WRITE_ONCE ( prev - > next , node ) ;
/* Wait for mcs node lock to be released */
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:31 +10:00
while ( ! node - > locked ) {
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:31 +10:00
if ( yield_to_prev ( lock , node , old , paravirt ) )
seen_preempted = true ;
}
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:16 +10:00
2022-11-26 19:59:25 +10:00
/* Clear out stale propagated yield_cpu */
if ( paravirt & & pv_yield_propagate_owner & & node - > yield_cpu ! = - 1 )
node - > yield_cpu = - 1 ;
2022-11-26 19:59:16 +10:00
smp_rmb ( ) ; /* acquire barrier for the mcs lock */
2022-11-26 19:59:32 +10:00
/*
* Generic qspinlocks have this prefetch here , but it seems
* like it could cause additional line transitions because
* the waiter will keep loading from it .
*/
if ( _Q_SPIN_PREFETCH_NEXT ) {
next = READ_ONCE ( node - > next ) ;
if ( next )
prefetchw ( next ) ;
}
2022-11-26 19:59:16 +10:00
}
/* We're at the head of the waitqueue, wait for the lock. */
2022-11-26 19:59:31 +10:00
again :
2022-11-26 19:59:28 +10:00
spin_begin ( ) ;
2022-11-26 19:59:16 +10:00
for ( ; ; ) {
2022-11-26 19:59:30 +10:00
bool preempted ;
2022-11-26 19:59:18 +10:00
val = READ_ONCE ( lock - > val ) ;
2022-11-26 19:59:16 +10:00
if ( ! ( val & _Q_LOCKED_VAL ) )
break ;
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:16 +10:00
2022-11-26 19:59:31 +10:00
if ( paravirt & & pv_sleepy_lock & & maybe_stealers ) {
if ( ! sleepy ) {
if ( val & _Q_SLEEPY_VAL ) {
seen_sleepy_lock ( ) ;
sleepy = true ;
} else if ( recently_sleepy ( ) ) {
sleepy = true ;
}
}
if ( pv_sleepy_lock_sticky & & seen_preempted & &
! ( val & _Q_SLEEPY_VAL ) ) {
if ( try_set_sleepy ( lock , val ) )
val | = _Q_SLEEPY_VAL ;
}
}
2022-11-26 19:59:25 +10:00
propagate_yield_cpu ( node , val , & set_yield_cpu , paravirt ) ;
2022-11-26 19:59:30 +10:00
preempted = yield_head_to_locked_owner ( lock , val , paravirt ) ;
2022-11-26 19:59:20 +10:00
if ( ! maybe_stealers )
continue ;
2022-11-26 19:59:31 +10:00
if ( preempted )
seen_preempted = true ;
if ( paravirt & & preempted ) {
sleepy = true ;
2022-11-26 19:59:30 +10:00
if ( ! pv_spin_on_preempted_owner )
iters + + ;
} else {
iters + + ;
}
2022-11-26 19:59:20 +10:00
2022-11-26 19:59:31 +10:00
if ( ! mustq & & iters > = get_head_spins ( paravirt , sleepy ) ) {
2022-11-26 19:59:20 +10:00
mustq = true ;
set_mustq ( lock ) ;
val | = _Q_MUST_Q_VAL ;
}
2022-11-26 19:59:16 +10:00
}
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:28 +10:00
spin_end ( ) ;
2022-11-26 19:59:16 +10:00
/* If we're the last queued, must clean up the tail. */
2022-11-26 19:59:19 +10:00
old = trylock_clean_tail ( lock , tail ) ;
if ( unlikely ( old & _Q_LOCKED_VAL ) ) {
BUG_ON ( ! maybe_stealers ) ;
goto again ; /* Can only be true if maybe_stealers. */
}
2022-11-26 19:59:16 +10:00
if ( ( old & _Q_TAIL_CPU_MASK ) = = tail )
2022-11-26 19:59:19 +10:00
goto release ; /* We were the tail, no next. */
2022-11-26 19:59:16 +10:00
/* There is a next, must wait for node->next != NULL (MCS protocol) */
2022-11-26 19:59:28 +10:00
next = READ_ONCE ( node - > next ) ;
if ( ! next ) {
spin_begin ( ) ;
while ( ! ( next = READ_ONCE ( node - > next ) ) )
cpu_relax ( ) ;
spin_end ( ) ;
}
2022-11-26 19:59:32 +10:00
spec_barrier ( ) ;
2022-11-26 19:59:16 +10:00
/*
* Unlock the next mcs waiter node . Release barrier is not required
* here because the acquirer is only accessing the lock word , and
* the acquire barrier we took the lock with orders that update vs
* this store to locked . The corresponding barrier is the smp_rmb ( )
* acquire barrier for mcs lock , above .
*/
2022-11-26 19:59:26 +10:00
if ( paravirt & & pv_prod_head ) {
int next_cpu = next - > cpu ;
WRITE_ONCE ( next - > locked , 1 ) ;
2022-11-26 19:59:32 +10:00
if ( _Q_SPIN_MISO )
asm volatile ( " miso " : : : " memory " ) ;
2022-11-26 19:59:26 +10:00
if ( vcpu_is_preempted ( next_cpu ) )
prod_cpu ( next_cpu ) ;
} else {
WRITE_ONCE ( next - > locked , 1 ) ;
2022-11-26 19:59:32 +10:00
if ( _Q_SPIN_MISO )
asm volatile ( " miso " : : : " memory " ) ;
2022-11-26 19:59:26 +10:00
}
2022-11-26 19:59:16 +10:00
release :
qnodesp - > count - - ; /* release the node */
}
void queued_spin_lock_slowpath ( struct qspinlock * lock )
{
2022-11-26 19:59:22 +10:00
/*
* This looks funny , but it induces the compiler to inline both
* sides of the branch rather than share code as when the condition
* is passed as the paravirt argument to the functions .
*/
if ( IS_ENABLED ( CONFIG_PARAVIRT_SPINLOCKS ) & & is_shared_processor ( ) ) {
2022-11-26 19:59:32 +10:00
if ( try_to_steal_lock ( lock , true ) ) {
spec_barrier ( ) ;
2022-11-26 19:59:22 +10:00
return ;
2022-11-26 19:59:32 +10:00
}
2022-11-26 19:59:22 +10:00
queued_spin_lock_mcs_queue ( lock , true ) ;
} else {
2022-11-26 19:59:32 +10:00
if ( try_to_steal_lock ( lock , false ) ) {
spec_barrier ( ) ;
2022-11-26 19:59:22 +10:00
return ;
2022-11-26 19:59:32 +10:00
}
2022-11-26 19:59:22 +10:00
queued_spin_lock_mcs_queue ( lock , false ) ;
}
2022-11-28 13:11:13 +10:00
}
EXPORT_SYMBOL ( queued_spin_lock_slowpath ) ;
# ifdef CONFIG_PARAVIRT_SPINLOCKS
void pv_spinlocks_init ( void )
{
}
# endif
2022-11-26 19:59:19 +10:00
# include <linux/debugfs.h>
static int steal_spins_set ( void * data , u64 val )
{
2022-11-26 19:59:27 +10:00
# if _Q_SPIN_TRY_LOCK_STEAL == 1
/* MAYBE_STEAL remains true */
steal_spins = val ;
# else
2022-11-26 19:59:19 +10:00
static DEFINE_MUTEX ( lock ) ;
/*
* The lock slow path has a ! maybe_stealers case that can assume
* the head of queue will not see concurrent waiters . That waiter
* is unsafe in the presence of stealers , so must keep them away
* from one another .
*/
mutex_lock ( & lock ) ;
if ( val & & ! steal_spins ) {
maybe_stealers = true ;
/* wait for queue head waiter to go away */
synchronize_rcu ( ) ;
steal_spins = val ;
} else if ( ! val & & steal_spins ) {
steal_spins = val ;
/* wait for all possible stealers to go away */
synchronize_rcu ( ) ;
maybe_stealers = false ;
} else {
steal_spins = val ;
}
mutex_unlock ( & lock ) ;
2022-11-26 19:59:27 +10:00
# endif
2022-11-26 19:59:19 +10:00
return 0 ;
}
static int steal_spins_get ( void * data , u64 * val )
{
* val = steal_spins ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_steal_spins , steal_spins_get , steal_spins_set , " %llu \n " ) ;
2022-11-26 19:59:29 +10:00
static int remote_steal_spins_set ( void * data , u64 val )
{
remote_steal_spins = val ;
return 0 ;
}
static int remote_steal_spins_get ( void * data , u64 * val )
{
* val = remote_steal_spins ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_remote_steal_spins , remote_steal_spins_get , remote_steal_spins_set , " %llu \n " ) ;
2022-11-26 19:59:20 +10:00
static int head_spins_set ( void * data , u64 val )
{
head_spins = val ;
return 0 ;
}
static int head_spins_get ( void * data , u64 * val )
{
* val = head_spins ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_head_spins , head_spins_get , head_spins_set , " %llu \n " ) ;
2022-11-26 19:59:22 +10:00
static int pv_yield_owner_set ( void * data , u64 val )
{
pv_yield_owner = ! ! val ;
return 0 ;
}
static int pv_yield_owner_get ( void * data , u64 * val )
{
* val = pv_yield_owner ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_yield_owner , pv_yield_owner_get , pv_yield_owner_set , " %llu \n " ) ;
2022-11-26 19:59:24 +10:00
static int pv_yield_allow_steal_set ( void * data , u64 val )
{
pv_yield_allow_steal = ! ! val ;
return 0 ;
}
static int pv_yield_allow_steal_get ( void * data , u64 * val )
{
* val = pv_yield_allow_steal ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_yield_allow_steal , pv_yield_allow_steal_get , pv_yield_allow_steal_set , " %llu \n " ) ;
2022-11-26 19:59:30 +10:00
static int pv_spin_on_preempted_owner_set ( void * data , u64 val )
{
pv_spin_on_preempted_owner = ! ! val ;
return 0 ;
}
static int pv_spin_on_preempted_owner_get ( void * data , u64 * val )
{
* val = pv_spin_on_preempted_owner ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_spin_on_preempted_owner , pv_spin_on_preempted_owner_get , pv_spin_on_preempted_owner_set , " %llu \n " ) ;
2022-11-26 19:59:31 +10:00
static int pv_sleepy_lock_set ( void * data , u64 val )
{
pv_sleepy_lock = ! ! val ;
return 0 ;
}
static int pv_sleepy_lock_get ( void * data , u64 * val )
{
* val = pv_sleepy_lock ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_sleepy_lock , pv_sleepy_lock_get , pv_sleepy_lock_set , " %llu \n " ) ;
static int pv_sleepy_lock_sticky_set ( void * data , u64 val )
{
pv_sleepy_lock_sticky = ! ! val ;
return 0 ;
}
static int pv_sleepy_lock_sticky_get ( void * data , u64 * val )
{
* val = pv_sleepy_lock_sticky ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_sleepy_lock_sticky , pv_sleepy_lock_sticky_get , pv_sleepy_lock_sticky_set , " %llu \n " ) ;
static int pv_sleepy_lock_interval_ns_set ( void * data , u64 val )
{
pv_sleepy_lock_interval_ns = val ;
return 0 ;
}
static int pv_sleepy_lock_interval_ns_get ( void * data , u64 * val )
{
* val = pv_sleepy_lock_interval_ns ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_sleepy_lock_interval_ns , pv_sleepy_lock_interval_ns_get , pv_sleepy_lock_interval_ns_set , " %llu \n " ) ;
static int pv_sleepy_lock_factor_set ( void * data , u64 val )
{
pv_sleepy_lock_factor = val ;
return 0 ;
}
static int pv_sleepy_lock_factor_get ( void * data , u64 * val )
{
* val = pv_sleepy_lock_factor ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_sleepy_lock_factor , pv_sleepy_lock_factor_get , pv_sleepy_lock_factor_set , " %llu \n " ) ;
2022-11-26 19:59:23 +10:00
static int pv_yield_prev_set ( void * data , u64 val )
{
pv_yield_prev = ! ! val ;
return 0 ;
}
static int pv_yield_prev_get ( void * data , u64 * val )
{
* val = pv_yield_prev ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_yield_prev , pv_yield_prev_get , pv_yield_prev_set , " %llu \n " ) ;
2022-11-26 19:59:25 +10:00
static int pv_yield_propagate_owner_set ( void * data , u64 val )
{
pv_yield_propagate_owner = ! ! val ;
return 0 ;
}
static int pv_yield_propagate_owner_get ( void * data , u64 * val )
{
* val = pv_yield_propagate_owner ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_yield_propagate_owner , pv_yield_propagate_owner_get , pv_yield_propagate_owner_set , " %llu \n " ) ;
2022-11-26 19:59:26 +10:00
static int pv_prod_head_set ( void * data , u64 val )
{
pv_prod_head = ! ! val ;
return 0 ;
}
static int pv_prod_head_get ( void * data , u64 * val )
{
* val = pv_prod_head ;
return 0 ;
}
DEFINE_SIMPLE_ATTRIBUTE ( fops_pv_prod_head , pv_prod_head_get , pv_prod_head_set , " %llu \n " ) ;
2022-11-26 19:59:19 +10:00
static __init int spinlock_debugfs_init ( void )
{
debugfs_create_file ( " qspl_steal_spins " , 0600 , arch_debugfs_dir , NULL , & fops_steal_spins ) ;
2022-11-26 19:59:29 +10:00
debugfs_create_file ( " qspl_remote_steal_spins " , 0600 , arch_debugfs_dir , NULL , & fops_remote_steal_spins ) ;
2022-11-26 19:59:20 +10:00
debugfs_create_file ( " qspl_head_spins " , 0600 , arch_debugfs_dir , NULL , & fops_head_spins ) ;
2022-11-26 19:59:22 +10:00
if ( is_shared_processor ( ) ) {
debugfs_create_file ( " qspl_pv_yield_owner " , 0600 , arch_debugfs_dir , NULL , & fops_pv_yield_owner ) ;
2022-11-26 19:59:24 +10:00
debugfs_create_file ( " qspl_pv_yield_allow_steal " , 0600 , arch_debugfs_dir , NULL , & fops_pv_yield_allow_steal ) ;
2022-11-26 19:59:30 +10:00
debugfs_create_file ( " qspl_pv_spin_on_preempted_owner " , 0600 , arch_debugfs_dir , NULL , & fops_pv_spin_on_preempted_owner ) ;
2022-11-26 19:59:31 +10:00
debugfs_create_file ( " qspl_pv_sleepy_lock " , 0600 , arch_debugfs_dir , NULL , & fops_pv_sleepy_lock ) ;
debugfs_create_file ( " qspl_pv_sleepy_lock_sticky " , 0600 , arch_debugfs_dir , NULL , & fops_pv_sleepy_lock_sticky ) ;
debugfs_create_file ( " qspl_pv_sleepy_lock_interval_ns " , 0600 , arch_debugfs_dir , NULL , & fops_pv_sleepy_lock_interval_ns ) ;
debugfs_create_file ( " qspl_pv_sleepy_lock_factor " , 0600 , arch_debugfs_dir , NULL , & fops_pv_sleepy_lock_factor ) ;
2022-11-26 19:59:23 +10:00
debugfs_create_file ( " qspl_pv_yield_prev " , 0600 , arch_debugfs_dir , NULL , & fops_pv_yield_prev ) ;
2022-11-26 19:59:25 +10:00
debugfs_create_file ( " qspl_pv_yield_propagate_owner " , 0600 , arch_debugfs_dir , NULL , & fops_pv_yield_propagate_owner ) ;
2022-11-26 19:59:26 +10:00
debugfs_create_file ( " qspl_pv_prod_head " , 0600 , arch_debugfs_dir , NULL , & fops_pv_prod_head ) ;
2022-11-26 19:59:22 +10:00
}
2022-11-26 19:59:19 +10:00
return 0 ;
}
device_initcall ( spinlock_debugfs_init ) ;