powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx.

The first 16 bits of the lock are only modified by the owner, and other modifications always use atomic operations on the entire 32 bits, so unlocks can use plain stores on the 16 bits. This is the same kind of optimisation done by core qspinlock code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221126095932.1234527-3-npiggin@gmail.com
2022-11-26 19:59:17 +10:00 · 2022-11-26 19:59:17 +10:00 · 4c93c2e4b9
commit 4c93c2e4b9
parent 84990b1695
2 changed files with 18 additions and 7 deletions
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@ -37,11 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)

 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
-	for (;;) {
-		int val = atomic_read(&lock->val);
-		if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
-			return;
-	}
+	smp_store_release(&lock->locked, 0);
 }

 #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@ -3,12 +3,27 @@
 #define _ASM_POWERPC_QSPINLOCK_TYPES_H

 #include <linux/types.h>
+#include <asm/byteorder.h>

 typedef struct qspinlock {
-	atomic_t val;
+	union {
+		atomic_t val;
+
+#ifdef __LITTLE_ENDIAN
+		struct {
+			u16	locked;
+			u8	reserved[2];
+		};
+#else
+		struct {
+			u8	reserved[2];
+			u16	locked;
+		};
+#endif
+	};
 } arch_spinlock_t;

-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = ATOMIC_INIT(0) } }

 /*
 * Bitfields in the lock word: