linux/arch/csky/include/asm/barrier.h
Guo Ren 186f69b64c csky: atomic: Optimize cmpxchg with acquire & release
Optimize cmpxchg with ASM acquire/release fence ASM instructions
instead of previous generic based. Prevent a fence when cmxchg's
first load != old.

Comments by Rutland:

8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for
full barrier semantics")

Comments by Boqun:

FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.

Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
2022-04-25 13:51:42 +08:00

89 lines
2.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_CSKY_BARRIER_H
#define __ASM_CSKY_BARRIER_H
#ifndef __ASSEMBLY__
#define nop() asm volatile ("nop\n":::"memory")
#ifdef CONFIG_SMP
/*
* bar.brwarws: ordering barrier for all load/store instructions
* before/after
*
* |31|30 26|25 21|20 16|15 10|9 5|4 0|
* 1 10000 00000 00000 100001 00001 0 bw br aw ar
*
* b: before
* a: after
* r: read
* w: write
*
* Here are all combinations:
*
* bar.brw
* bar.br
* bar.bw
* bar.arw
* bar.ar
* bar.aw
* bar.brwarw
* bar.brarw
* bar.bwarw
* bar.brwar
* bar.brwaw
* bar.brar
* bar.bwaw
*/
#define FULL_FENCE ".long 0x842fc000\n"
#define ACQUIRE_FENCE ".long 0x8427c000\n"
#define RELEASE_FENCE ".long 0x842ec000\n"
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
#define __smp_mb() __bar_brwarw()
#define __smp_rmb() __bar_brar()
#define __smp_wmb() __bar_bwaw()
#define __smp_acquire_fence() __bar_brarw()
#define __smp_release_fence() __bar_brwaw()
#endif /* CONFIG_SMP */
/*
* sync: completion barrier, all sync.xx instructions
* guarantee the last response received by bus transaction
* made by ld/st instructions before sync.s
* sync.s: inherit from sync, but also shareable to other cores
* sync.i: inherit from sync, but also flush cpu pipeline
* sync.is: the same with sync.i + sync.s
*/
#define mb() asm volatile ("sync\n":::"memory")
#ifdef CONFIG_CPU_HAS_CACHEV2
/*
* Using three sync.is to prevent speculative PTW
*/
#define sync_is() asm volatile ("sync.is\nsync.is\nsync.is\n":::"memory")
#endif
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CSKY_BARRIER_H */