s390/rwlock: introduce rwlock wait queueing

Like the common queued rwlock code the s390 implementation uses the
queued spinlock code on a spinlock_t embedded in the rwlock_t to achieve
the queueing. The encoding of the rwlock_t differs though, the counter
field in the rwlock_t is split into two parts. The upper two bytes hold
the write bit and the write wait counter, the lower two bytes hold the
read counter.

The arch_read_lock operation works exactly like the common qrwlock but
the enqueue operation for a writer follows a diffent logic. After the
failed inline try to get the rwlock in write, the writer first increases
the write wait counter, acquires the wait spin_lock for the queueing,
and then loops until there are no readers and the write bit is zero.
Without the write wait counter a CPU that just released the rwlock
could immediately reacquire the lock in the inline code, bypassing all
outstanding read and write waiters. For s390 this would cause massive
imbalances in favour of writers in case of a contended rwlock.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Martin Schwidefsky 2017-03-24 17:32:23 +01:00
parent b96f7d881a
commit eb3b7b848f
4 changed files with 75 additions and 247 deletions

View File

@ -39,19 +39,24 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg")
#undef __ATOMIC_OPS
#undef __ATOMIC_OP
static inline void __atomic_add_const(int val, int *ptr)
{
asm volatile(
" asi %[ptr],%[val]\n"
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc");
#define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \
static inline void op_name(op_type val, op_type *ptr) \
{ \
asm volatile( \
op_string " %[ptr],%[val]\n" \
op_barrier \
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc", "memory");\
}
static inline void __atomic64_add_const(long val, long *ptr)
{
asm volatile(
" agsi %[ptr],%[val]\n"
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc");
}
#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
__ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#undef __ATOMIC_CONST_OPS
#undef __ATOMIC_CONST_OP
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
@ -107,6 +112,11 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#undef __ATOMIC64_OPS
#define __atomic_add_const(val, ptr) __atomic_add(val, ptr)
#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr)
#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr)
#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr)
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
static inline int __atomic_cmpxchg(int *ptr, int old, int new)

View File

@ -35,7 +35,6 @@ bool arch_vcpu_is_preempted(int cpu);
* (the type definitions are in asm/spinlock_types.h)
*/
void arch_lock_relax(int cpu);
void arch_spin_relax(arch_spinlock_t *lock);
void arch_spin_lock_wait(arch_spinlock_t *);
@ -110,164 +109,63 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_read_can_lock(x) ((int)(x)->lock >= 0)
#define arch_read_can_lock(x) (((x)->cnts & 0xffff0000) == 0)
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_write_can_lock(x) ((x)->lock == 0)
extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
#define arch_write_can_lock(x) ((x)->cnts == 0)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_read_relax(rw) barrier()
#define arch_write_relax(rw) barrier()
static inline int arch_read_trylock_once(arch_rwlock_t *rw)
{
int old = ACCESS_ONCE(rw->lock);
return likely(old >= 0 &&
__atomic_cmpxchg_bool(&rw->lock, old, old + 1));
}
static inline int arch_write_trylock_once(arch_rwlock_t *rw)
{
int old = ACCESS_ONCE(rw->lock);
return likely(old == 0 &&
__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
}
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __RAW_OP_OR "lao"
#define __RAW_OP_AND "lan"
#define __RAW_OP_ADD "laa"
#define __RAW_LOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
"bcr 14,0\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
#define __RAW_UNLOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
void arch_read_lock_wait(arch_rwlock_t *lp);
void arch_write_lock_wait(arch_rwlock_t *lp);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
int old;
old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
if (old < 0)
_raw_read_lock_wait(rw);
old = __atomic_add(1, &rw->cnts);
if (old & 0xffff0000)
arch_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
__RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD);
__atomic_add_const_barrier(-1, &rw->cnts);
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
int old;
old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
if (old != 0)
_raw_write_lock_wait(rw, old);
rw->owner = SPINLOCK_LOCKVAL;
if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
arch_write_lock_wait(rw);
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
rw->owner = 0;
__RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND);
__atomic_add_barrier(-0x30000, &rw->cnts);
}
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
_raw_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
int old;
do {
old = ACCESS_ONCE(rw->lock);
} while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1));
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw))
_raw_write_lock_wait(rw);
rw->owner = SPINLOCK_LOCKVAL;
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
typecheck(int, rw->lock);
rw->owner = 0;
asm volatile(
"st %1,%0\n"
: "+Q" (rw->lock)
: "d" (0)
: "cc", "memory");
}
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
return _raw_read_trylock_retry(rw);
return 1;
int old;
old = READ_ONCE(rw->cnts);
return (!(old & 0xffff0000) &&
__atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw))
return 0;
rw->owner = SPINLOCK_LOCKVAL;
return 1;
}
int old;
static inline void arch_read_relax(arch_rwlock_t *rw)
{
arch_lock_relax(rw->owner);
}
static inline void arch_write_relax(arch_rwlock_t *rw)
{
arch_lock_relax(rw->owner);
old = READ_ONCE(rw->cnts);
return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
}
#endif /* __ASM_SPINLOCK_H */

View File

@ -12,8 +12,8 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
typedef struct {
int lock;
int owner;
int cnts;
arch_spinlock_t wait;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }

View File

@ -268,129 +268,49 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp)
}
EXPORT_SYMBOL(arch_spin_trylock_retry);
void _raw_read_lock_wait(arch_rwlock_t *rw)
void arch_read_lock_wait(arch_rwlock_t *rw)
{
int count = spin_retry;
int owner, old;
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
#endif
owner = 0;
while (1) {
if (count-- <= 0) {
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
if (old < 0)
continue;
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
return;
if (unlikely(in_interrupt())) {
while (READ_ONCE(rw->cnts) & 0x10000)
barrier();
return;
}
}
EXPORT_SYMBOL(_raw_read_lock_wait);
int _raw_read_trylock_retry(arch_rwlock_t *rw)
/* Remove this reader again to allow recursive read locking */
__atomic_add_const(-1, &rw->cnts);
/* Put the reader into the wait queue */
arch_spin_lock(&rw->wait);
/* Now add this reader to the count value again */
__atomic_add_const(1, &rw->cnts);
/* Loop until the writer is done */
while (READ_ONCE(rw->cnts) & 0x10000)
barrier();
arch_spin_unlock(&rw->wait);
}
EXPORT_SYMBOL(arch_read_lock_wait);
void arch_write_lock_wait(arch_rwlock_t *rw)
{
int count = spin_retry;
int old;
while (count-- > 0) {
old = ACCESS_ONCE(rw->lock);
if (old < 0)
continue;
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
return 1;
}
return 0;
}
EXPORT_SYMBOL(_raw_read_trylock_retry);
/* Add this CPU to the write waiters */
__atomic_add(0x20000, &rw->cnts);
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
/* Put the writer into the wait queue */
arch_spin_lock(&rw->wait);
void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
{
int count = spin_retry;
int owner, old;
owner = 0;
while (1) {
if (count-- <= 0) {
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
smp_mb();
if (old >= 0) {
prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
old = prev;
}
if ((old & 0x7fffffff) == 0 && prev >= 0)
old = READ_ONCE(rw->cnts);
if ((old & 0x1ffff) == 0 &&
__atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
/* Got the lock */
break;
barrier();
}
arch_spin_unlock(&rw->wait);
}
EXPORT_SYMBOL(_raw_write_lock_wait);
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
void _raw_write_lock_wait(arch_rwlock_t *rw)
{
int count = spin_retry;
int owner, old, prev;
prev = 0x80000000;
owner = 0;
while (1) {
if (count-- <= 0) {
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
if (old >= 0 &&
__atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
prev = old;
else
smp_mb();
if ((old & 0x7fffffff) == 0 && prev >= 0)
break;
}
}
EXPORT_SYMBOL(_raw_write_lock_wait);
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
int count = spin_retry;
int old;
while (count-- > 0) {
old = ACCESS_ONCE(rw->lock);
if (old)
continue;
if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
return 1;
}
return 0;
}
EXPORT_SYMBOL(_raw_write_trylock_retry);
void arch_lock_relax(int cpu)
{
if (!cpu)
return;
if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
return;
smp_yield_cpu(cpu - 1);
}
EXPORT_SYMBOL(arch_lock_relax);
EXPORT_SYMBOL(arch_write_lock_wait);
void arch_spin_relax(arch_spinlock_t *lp)
{