kernel: Provide READ_ONCE and ASSIGN_ONCE
As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com ACCESS_ONCE might fail with specific compilers for non-scalar accesses. Here is a set of patches to tackle that problem. The first patch introduce READ_ONCE and ASSIGN_ONCE. If the data structure is larger than the machine word size memcpy is used and a warning is emitted. The next patches fix up several in-tree users of ACCESS_ONCE on non-scalar types. This merge does not yet contain a patch that forces ACCESS_ONCE to work only on scalar types. This is targetted for the next merge window as Linux next already contains new offenders regarding ACCESS_ONCE vs. non-scalar types. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (GNU/Linux) iQIcBAABAgAGBQJUkrVGAAoJEBF7vIC1phx8stkP/2LmN5y6LOseoEW06xa5MX4m cbIKsZNtsGHl7EDcTzzuWs6Sq5/Cj7V3yzeBF7QGbUKOqvFWU3jvpUBCCfjMg37C 77/Vf0ZPrxTXXxeJ4Ykdy2CGvuMtuYY9TWkrRNKmLU0xex7lGblEzCt9z6+mZviw 26/DN8ctjkHRvIUAi+7RfQBBc3oSMYAC1mzxYKBAsAFLV+LyFmsGU/4iofZMAsdt XFyVXlrLn0Bjx/MeceGkOlMDiVx4FnfccfFaD4hhuTLBJXWitkUK/MRa4JBiXWzH agY8942A8/j9wkI2DFp/pqZYqA/sTXLndyOWlhE//ZSti0n0BSJaOx3S27rTLkAc 5VmZEVyIrS3hyOpyyAi0sSoPkDnjeCHmQg9Rqn34/poKLd7JDrW2UkERNCf/T3eh GI2rbhAlZz3v5mIShn8RrxzslWYmOObpMr3HYNUdRk8YUfTf6d6aZ3txHp2nP4mD VBAEzsvP9rcVT2caVhU2dnBzeaZAj3zeDxBtjcb3X2osY9tI7qgLc9Fa/fWKgILk 2evkLcctsae2mlLNGHyaK3Dm/ZmYJv+57MyaQQEZNfZZgeB1y4k0DkxH4w1CFmCi s8XlH5voEHgnyjSQXXgc/PNVlkPAKr78ZyTiAfiKmh8rpe41/W4hGcgao7L9Lgiu SI0uSwKibuZt4dHGxQuG =IQ5o -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger: "kernel: Provide READ_ONCE and ASSIGN_ONCE As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com ACCESS_ONCE might fail with specific compilers for non-scalar accesses. Here is a set of patches to tackle that problem. The first patch introduce READ_ONCE and ASSIGN_ONCE. If the data structure is larger than the machine word size memcpy is used and a warning is emitted. The next patches fix up several in-tree users of ACCESS_ONCE on non-scalar types. This does not yet contain a patch that forces ACCESS_ONCE to work only on scalar types. This is targetted for the next merge window as Linux next already contains new offenders regarding ACCESS_ONCE vs. non-scalar types" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux: s390/kvm: REPLACE barrier fixup with READ_ONCE arm/spinlock: Replace ACCESS_ONCE with READ_ONCE arm64/spinlock: Replace ACCESS_ONCE READ_ONCE mips/gup: Replace ACCESS_ONCE with READ_ONCE x86/gup: Replace ACCESS_ONCE with READ_ONCE x86/spinlock: Replace ACCESS_ONCE with READ_ONCE mm: replace ACCESS_ONCE with READ_ONCE or barriers kernel: Provide READ_ONCE and ASSIGN_ONCE
This commit is contained in:
commit
60815cf2e0
@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
|||||||
|
|
||||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
|
return !arch_spin_value_unlocked(READ_ONCE(*lock));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
|
struct __raw_tickets tickets = READ_ONCE(lock->tickets);
|
||||||
return (tickets.next - tickets.owner) > 1;
|
return (tickets.next - tickets.owner) > 1;
|
||||||
}
|
}
|
||||||
#define arch_spin_is_contended arch_spin_is_contended
|
#define arch_spin_is_contended arch_spin_is_contended
|
||||||
|
@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
|||||||
|
|
||||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
|
return !arch_spin_value_unlocked(READ_ONCE(*lock));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
arch_spinlock_t lockval = ACCESS_ONCE(*lock);
|
arch_spinlock_t lockval = READ_ONCE(*lock);
|
||||||
return (lockval.next - lockval.owner) > 1;
|
return (lockval.next - lockval.owner) > 1;
|
||||||
}
|
}
|
||||||
#define arch_spin_is_contended arch_spin_is_contended
|
#define arch_spin_is_contended arch_spin_is_contended
|
||||||
|
@ -30,7 +30,7 @@ retry:
|
|||||||
|
|
||||||
return pte;
|
return pte;
|
||||||
#else
|
#else
|
||||||
return ACCESS_ONCE(*ptep);
|
return READ_ONCE(*ptep);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,12 +227,10 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
|
|||||||
goto out;
|
goto out;
|
||||||
ic = &vcpu->kvm->arch.sca->ipte_control;
|
ic = &vcpu->kvm->arch.sca->ipte_control;
|
||||||
do {
|
do {
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
while (old.k) {
|
while (old.k) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
}
|
}
|
||||||
new = old;
|
new = old;
|
||||||
new.k = 1;
|
new.k = 1;
|
||||||
@ -251,8 +249,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
|
|||||||
goto out;
|
goto out;
|
||||||
ic = &vcpu->kvm->arch.sca->ipte_control;
|
ic = &vcpu->kvm->arch.sca->ipte_control;
|
||||||
do {
|
do {
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
new = old;
|
new = old;
|
||||||
new.k = 0;
|
new.k = 0;
|
||||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||||
@ -267,12 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
ic = &vcpu->kvm->arch.sca->ipte_control;
|
ic = &vcpu->kvm->arch.sca->ipte_control;
|
||||||
do {
|
do {
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
while (old.kg) {
|
while (old.kg) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
}
|
}
|
||||||
new = old;
|
new = old;
|
||||||
new.k = 1;
|
new.k = 1;
|
||||||
@ -286,8 +281,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
ic = &vcpu->kvm->arch.sca->ipte_control;
|
ic = &vcpu->kvm->arch.sca->ipte_control;
|
||||||
do {
|
do {
|
||||||
old = *ic;
|
old = READ_ONCE(*ic);
|
||||||
barrier();
|
|
||||||
new = old;
|
new = old;
|
||||||
new.kh--;
|
new.kh--;
|
||||||
if (!new.kh)
|
if (!new.kh)
|
||||||
|
@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
|
|||||||
unsigned count = SPIN_THRESHOLD;
|
unsigned count = SPIN_THRESHOLD;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
|
if (READ_ONCE(lock->tickets.head) == inc.tail)
|
||||||
goto out;
|
goto out;
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
} while (--count);
|
} while (--count);
|
||||||
@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
|
|||||||
{
|
{
|
||||||
arch_spinlock_t old, new;
|
arch_spinlock_t old, new;
|
||||||
|
|
||||||
old.tickets = ACCESS_ONCE(lock->tickets);
|
old.tickets = READ_ONCE(lock->tickets);
|
||||||
if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
|
if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
|
|||||||
|
|
||||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
|
||||||
|
|
||||||
return tmp.tail != tmp.head;
|
return tmp.tail != tmp.head;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
|
||||||
|
|
||||||
return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
|
return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
static inline pte_t gup_get_pte(pte_t *ptep)
|
static inline pte_t gup_get_pte(pte_t *ptep)
|
||||||
{
|
{
|
||||||
#ifndef CONFIG_X86_PAE
|
#ifndef CONFIG_X86_PAE
|
||||||
return ACCESS_ONCE(*ptep);
|
return READ_ONCE(*ptep);
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* With get_user_pages_fast, we walk down the pagetables without taking
|
* With get_user_pages_fast, we walk down the pagetables without taking
|
||||||
|
@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
|
|||||||
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
|
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <uapi/linux/types.h>
|
||||||
|
|
||||||
|
static __always_inline void data_access_exceeds_word_size(void)
|
||||||
|
#ifdef __compiletime_warning
|
||||||
|
__compiletime_warning("data access exceeds word size and won't be atomic")
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
|
static __always_inline void data_access_exceeds_word_size(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline void __read_once_size(volatile void *p, void *res, int size)
|
||||||
|
{
|
||||||
|
switch (size) {
|
||||||
|
case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
|
||||||
|
case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
|
||||||
|
case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
barrier();
|
||||||
|
__builtin_memcpy((void *)res, (const void *)p, size);
|
||||||
|
data_access_exceeds_word_size();
|
||||||
|
barrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
|
||||||
|
{
|
||||||
|
switch (size) {
|
||||||
|
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
|
||||||
|
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
|
||||||
|
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
barrier();
|
||||||
|
__builtin_memcpy((void *)p, (const void *)res, size);
|
||||||
|
data_access_exceeds_word_size();
|
||||||
|
barrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent the compiler from merging or refetching reads or writes. The
|
||||||
|
* compiler is also forbidden from reordering successive instances of
|
||||||
|
* READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
|
||||||
|
* compiler is aware of some particular ordering. One way to make the
|
||||||
|
* compiler aware of ordering is to put the two invocations of READ_ONCE,
|
||||||
|
* ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
|
||||||
|
*
|
||||||
|
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
|
||||||
|
* data types like structs or unions. If the size of the accessed data
|
||||||
|
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
|
||||||
|
* READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a
|
||||||
|
* compile-time warning.
|
||||||
|
*
|
||||||
|
* Their two major use cases are: (1) Mediating communication between
|
||||||
|
* process-level code and irq/NMI handlers, all running on the same CPU,
|
||||||
|
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
|
||||||
|
* mutilate accesses that either do not require ordering or that interact
|
||||||
|
* with an explicit memory barrier or atomic instruction that provides the
|
||||||
|
* required ordering.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define READ_ONCE(x) \
|
||||||
|
({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
|
||||||
|
|
||||||
|
#define ASSIGN_ONCE(val, x) \
|
||||||
|
({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
2
mm/gup.c
2
mm/gup.c
@ -968,7 +968,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
|||||||
|
|
||||||
pudp = pud_offset(&pgd, addr);
|
pudp = pud_offset(&pgd, addr);
|
||||||
do {
|
do {
|
||||||
pud_t pud = ACCESS_ONCE(*pudp);
|
pud_t pud = READ_ONCE(*pudp);
|
||||||
|
|
||||||
next = pud_addr_end(addr, end);
|
next = pud_addr_end(addr, end);
|
||||||
if (pud_none(pud))
|
if (pud_none(pud))
|
||||||
|
11
mm/memory.c
11
mm/memory.c
@ -3195,7 +3195,16 @@ static int handle_pte_fault(struct mm_struct *mm,
|
|||||||
pte_t entry;
|
pte_t entry;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
|
|
||||||
entry = ACCESS_ONCE(*pte);
|
/*
|
||||||
|
* some architectures can have larger ptes than wordsize,
|
||||||
|
* e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
|
||||||
|
* so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
|
||||||
|
* The code below just needs a consistent view for the ifs and
|
||||||
|
* we later double check anyway with the ptl lock held. So here
|
||||||
|
* a barrier will do.
|
||||||
|
*/
|
||||||
|
entry = *pte;
|
||||||
|
barrier();
|
||||||
if (!pte_present(entry)) {
|
if (!pte_present(entry)) {
|
||||||
if (pte_none(entry)) {
|
if (pte_none(entry)) {
|
||||||
if (vma->vm_ops) {
|
if (vma->vm_ops) {
|
||||||
|
@ -583,7 +583,8 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
|
|||||||
* without holding anon_vma lock for write. So when looking for a
|
* without holding anon_vma lock for write. So when looking for a
|
||||||
* genuine pmde (in which to find pte), test present and !THP together.
|
* genuine pmde (in which to find pte), test present and !THP together.
|
||||||
*/
|
*/
|
||||||
pmde = ACCESS_ONCE(*pmd);
|
pmde = *pmd;
|
||||||
|
barrier();
|
||||||
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
|
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
|
||||||
pmd = NULL;
|
pmd = NULL;
|
||||||
out:
|
out:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user