Heiko Carstens 0ccc8b7ac8 s390/bitops,atomic: add missing memory barriers
When reworking the bitops and atomic ops I missed that those instructions
that got atomic behaviour only perform a "specific-operand-serialization"
instead of a full "serialization".
The compare-and-swap instruction used before performs a full serialization
before and after the instruction is executed, which means it has full
memory barrier semantics.
In order to give the new bitops and atomic ops functions also full memory
barrier semantics add a "bcr 14,0" before and after each of those new
instructions which performs full serialization as well.

This restores memory barrier semantics for bitops and atomic ops functions
which return values, like e.g. atomic_add_return(), but not for functions
which do not return a value, like e.g. atomic_add().
This is consistent to other architectures and what common code requires.

Cc: stable@vger.kernel.org # v3.13+
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-04-01 09:23:35 +02:00

483 lines
12 KiB
C

/*
* Copyright IBM Corp. 1999,2013
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
*
* The description below was taken in large parts from the powerpc
* bitops header file:
* Within a word, bits are numbered LSB first. Lot's of places make
* this assumption by directly testing bits with (val & (1<<nr)).
* This can cause confusion for large (> 1 word) bitmaps on a
* big-endian system because, unlike little endian, the number of each
* bit depends on the word size.
*
* The bitop functions are defined to work on unsigned longs, so for an
* s390x system the bits end up numbered:
* |63..............0|127............64|191...........128|255...........192|
* and on s390:
* |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
*
* There are a few little-endian macros used mostly for filesystem
* bitmaps, these work on similar bit arrays layouts, but
* byte-oriented:
* |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
*
* The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
* number field needs to be reversed compared to the big-endian bit
* fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
*
* We also have special functions which work with an MSB0 encoding:
* on an s390x system the bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
* and on s390:
* |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*
* The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
* number field needs to be reversed compared to the LSB0 encoded bit
* fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
*
*/
#ifndef _S390_BITOPS_H
#define _S390_BITOPS_H
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif
#include <linux/typecheck.h>
#include <linux/compiler.h>
#include <asm/barrier.h>
#define __BITOPS_NO_BARRIER "\n"
#ifndef CONFIG_64BIT
#define __BITOPS_OR "or"
#define __BITOPS_AND "nr"
#define __BITOPS_XOR "xr"
#define __BITOPS_BARRIER "\n"
#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old, __new; \
\
typecheck(unsigned long *, (__addr)); \
asm volatile( \
" l %0,%2\n" \
"0: lr %1,%0\n" \
__op_string " %1,%3\n" \
" cs %0,%1,%2\n" \
" jl 0b" \
: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
: "d" (__val) \
: "cc", "memory"); \
__old; \
})
#else /* CONFIG_64BIT */
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __BITOPS_OR "laog"
#define __BITOPS_AND "lang"
#define __BITOPS_XOR "laxg"
#define __BITOPS_BARRIER "bcr 14,0\n"
#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old; \
\
typecheck(unsigned long *, (__addr)); \
asm volatile( \
__barrier \
__op_string " %0,%2,%1\n" \
__barrier \
: "=d" (__old), "+Q" (*(__addr)) \
: "d" (__val) \
: "cc", "memory"); \
__old; \
})
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __BITOPS_OR "ogr"
#define __BITOPS_AND "ngr"
#define __BITOPS_XOR "xgr"
#define __BITOPS_BARRIER "\n"
#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old, __new; \
\
typecheck(unsigned long *, (__addr)); \
asm volatile( \
" lg %0,%2\n" \
"0: lgr %1,%0\n" \
__op_string " %1,%3\n" \
" csg %0,%1,%2\n" \
" jl 0b" \
: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
: "d" (__val) \
: "cc", "memory"); \
__old; \
})
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#endif /* CONFIG_64BIT */
#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
static inline unsigned long *
__bitops_word(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long addr;
addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
return (unsigned long *)addr;
}
static inline unsigned char *
__bitops_byte(unsigned long nr, volatile unsigned long *ptr)
{
return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
}
static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
if (__builtin_constant_p(nr)) {
unsigned char *caddr = __bitops_byte(nr, ptr);
asm volatile(
"oi %0,%b1\n"
: "+Q" (*caddr)
: "i" (1 << (nr & 7))
: "cc", "memory");
return;
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
__BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
}
static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
if (__builtin_constant_p(nr)) {
unsigned char *caddr = __bitops_byte(nr, ptr);
asm volatile(
"ni %0,%b1\n"
: "+Q" (*caddr)
: "i" (~(1 << (nr & 7)))
: "cc", "memory");
return;
}
#endif
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
__BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
}
static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
if (__builtin_constant_p(nr)) {
unsigned char *caddr = __bitops_byte(nr, ptr);
asm volatile(
"xi %0,%b1\n"
: "+Q" (*caddr)
: "i" (1 << (nr & 7))
: "cc", "memory");
return;
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
__BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
}
static inline int
test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
return (old & mask) != 0;
}
static inline int
test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
return (old & ~mask) != 0;
}
static inline int
test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
return (old & mask) != 0;
}
static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr |= 1 << (nr & 7);
}
static inline void
__clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr &= ~(1 << (nr & 7));
}
static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr ^= 1 << (nr & 7);
}
static inline int
__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
ch = *addr;
*addr |= 1 << (nr & 7);
return (ch >> (nr & 7)) & 1;
}
static inline int
__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
ch = *addr;
*addr &= ~(1 << (nr & 7));
return (ch >> (nr & 7)) & 1;
}
static inline int
__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
ch = *addr;
*addr ^= 1 << (nr & 7);
return (ch >> (nr & 7)) & 1;
}
static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
{
const volatile unsigned char *addr;
addr = ((const volatile unsigned char *)ptr);
addr += (nr ^ (BITS_PER_LONG - 8)) >> 3;
return (*addr >> (nr & 7)) & 1;
}
/*
* Functions which use MSB0 bit numbering.
* On an s390x system the bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
* and on s390:
* |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*/
unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
unsigned long offset);
static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
static inline int test_bit_inv(unsigned long nr,
const volatile unsigned long *ptr)
{
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
/**
* __flogr - find leftmost one
* @word - The word to search
*
* Returns the bit number of the most significant bit set,
* where the most significant bit has bit number 0.
* If no bit is set this function returns 64.
*/
static inline unsigned char __flogr(unsigned long word)
{
if (__builtin_constant_p(word)) {
unsigned long bit = 0;
if (!word)
return 64;
if (!(word & 0xffffffff00000000UL)) {
word <<= 32;
bit += 32;
}
if (!(word & 0xffff000000000000UL)) {
word <<= 16;
bit += 16;
}
if (!(word & 0xff00000000000000UL)) {
word <<= 8;
bit += 8;
}
if (!(word & 0xf000000000000000UL)) {
word <<= 4;
bit += 4;
}
if (!(word & 0xc000000000000000UL)) {
word <<= 2;
bit += 2;
}
if (!(word & 0x8000000000000000UL)) {
word <<= 1;
bit += 1;
}
return bit;
} else {
register unsigned long bit asm("4") = word;
register unsigned long out asm("5");
asm volatile(
" flogr %[bit],%[bit]\n"
: [bit] "+d" (bit), [out] "=d" (out) : : "cc");
return bit;
}
}
/**
* __ffs - find first bit in word.
* @word: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
{
return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
}
/**
* ffs - find first bit set
* @word: the word to search
*
* This is defined the same way as the libc and
* compiler builtin ffs routines (man ffs).
*/
static inline int ffs(int word)
{
unsigned long mask = 2 * BITS_PER_LONG - 1;
unsigned int val = (unsigned int)word;
return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
}
/**
* __fls - find last (most-significant) set bit in a long word
* @word: the word to search
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
static inline unsigned long __fls(unsigned long word)
{
return __flogr(word) ^ (BITS_PER_LONG - 1);
}
/**
* fls64 - find last set bit in a 64-bit word
* @word: the word to search
*
* This is defined in a similar way as the libc and compiler builtin
* ffsll, but returns the position of the most significant set bit.
*
* fls64(value) returns 0 if value is 0 or the position of the last
* set bit if value is nonzero. The last (most significant) bit is
* at position 64.
*/
static inline int fls64(unsigned long word)
{
unsigned long mask = 2 * BITS_PER_LONG - 1;
return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
}
/**
* fls - find last (most-significant) bit set
* @word: the word to search
*
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
static inline int fls(int word)
{
return fls64((unsigned int)word);
}
#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/fls64.h>
#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
#endif /* _S390_BITOPS_H */