2021-09-23 14:10:51 -03:00
/* SPDX-License-Identifier: GPL-2.0 */
# ifndef _FUTEX_H
# define _FUTEX_H
2021-09-23 14:10:58 -03:00
# include <linux/futex.h>
2022-04-07 13:43:15 +02:00
# include <linux/rtmutex.h>
2021-09-23 14:11:02 -03:00
# include <linux/sched/wake_q.h>
2023-09-21 12:45:08 +02:00
# include <linux/compat.h>
2021-09-23 14:11:02 -03:00
2021-10-15 12:05:59 +02:00
# ifdef CONFIG_PREEMPT_RT
# include <linux/rcuwait.h>
# endif
2021-09-23 14:10:51 -03:00
# include <asm/futex.h>
/*
* Futex flags used to encode options to functions and preserve them across
* restarts .
*/
2023-09-21 12:45:11 +02:00
# define FLAGS_SIZE_8 0x0000
# define FLAGS_SIZE_16 0x0001
# define FLAGS_SIZE_32 0x0002
# define FLAGS_SIZE_64 0x0003
2023-09-21 12:45:08 +02:00
2023-09-21 12:45:11 +02:00
# define FLAGS_SIZE_MASK 0x0003
2023-09-21 12:45:08 +02:00
2021-09-23 14:10:51 -03:00
# ifdef CONFIG_MMU
2023-09-21 12:45:11 +02:00
# define FLAGS_SHARED 0x0010
2021-09-23 14:10:51 -03:00
# else
/*
* NOMMU does not have per process address space . Let the compiler optimize
* code away .
*/
2023-09-21 12:45:11 +02:00
# define FLAGS_SHARED 0x0000
2021-09-23 14:10:51 -03:00
# endif
2023-09-21 12:45:11 +02:00
# define FLAGS_CLOCKRT 0x0020
# define FLAGS_HAS_TIMEOUT 0x0040
# define FLAGS_NUMA 0x0080
# define FLAGS_STRICT 0x0100
2023-09-21 12:45:08 +02:00
/* FUTEX_ to FLAGS_ */
static inline unsigned int futex_to_flags ( unsigned int op )
{
unsigned int flags = FLAGS_SIZE_32 ;
if ( ! ( op & FUTEX_PRIVATE_FLAG ) )
flags | = FLAGS_SHARED ;
if ( op & FUTEX_CLOCK_REALTIME )
flags | = FLAGS_CLOCKRT ;
return flags ;
}
2023-07-21 08:41:27 -06:00
# define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
2023-09-21 12:45:08 +02:00
/* FUTEX2_ to FLAGS_ */
static inline unsigned int futex2_to_flags ( unsigned int flags2 )
{
unsigned int flags = flags2 & FUTEX2_SIZE_MASK ;
if ( ! ( flags2 & FUTEX2_PRIVATE ) )
flags | = FLAGS_SHARED ;
if ( flags2 & FUTEX2_NUMA )
flags | = FLAGS_NUMA ;
return flags ;
}
static inline unsigned int futex_size ( unsigned int flags )
{
return 1 < < ( flags & FLAGS_SIZE_MASK ) ;
}
static inline bool futex_flags_valid ( unsigned int flags )
{
/* Only 64bit futexes for 64bit code */
if ( ! IS_ENABLED ( CONFIG_64BIT ) | | in_compat_syscall ( ) ) {
if ( ( flags & FLAGS_SIZE_MASK ) = = FLAGS_SIZE_64 )
return false ;
}
/* Only 32bit futexes are implemented -- for now */
if ( ( flags & FLAGS_SIZE_MASK ) ! = FLAGS_SIZE_32 )
return false ;
return true ;
}
2021-09-23 14:10:51 -03:00
2023-09-21 12:45:09 +02:00
static inline bool futex_validate_input ( unsigned int flags , u64 val )
{
int bits = 8 * futex_size ( flags ) ;
if ( bits < 64 & & ( val > > bits ) )
return false ;
return true ;
}
2021-09-23 14:10:51 -03:00
# ifdef CONFIG_FAIL_FUTEX
extern bool should_fail_futex ( bool fshared ) ;
# else
static inline bool should_fail_futex ( bool fshared )
{
return false ;
}
# endif
2021-09-23 14:10:58 -03:00
/*
* Hash buckets are shared by all the futex_keys that hash to the same
* location . Each key may have multiple futex_q structures , one for each task
* waiting on a futex .
*/
struct futex_hash_bucket {
atomic_t waiters ;
spinlock_t lock ;
struct plist_head chain ;
} ____cacheline_aligned_in_smp ;
/*
* Priority Inheritance state :
*/
struct futex_pi_state {
/*
* list of ' owned ' pi_state instances - these have to be
* cleaned up in do_exit ( ) if the task exits prematurely :
*/
struct list_head list ;
/*
* The PI object :
*/
struct rt_mutex_base pi_mutex ;
struct task_struct * owner ;
refcount_t refcount ;
union futex_key key ;
} __randomize_layout ;
2023-06-08 11:56:06 -06:00
struct futex_q ;
typedef void ( futex_wake_fn ) ( struct wake_q_head * wake_q , struct futex_q * q ) ;
2021-09-23 14:10:58 -03:00
/**
* struct futex_q - The hashed futex queue entry , one per waiting task
* @ list : priority - sorted list of tasks waiting on this futex
* @ task : the task waiting on the futex
* @ lock_ptr : the hash bucket lock
2023-06-08 11:56:06 -06:00
* @ wake : the wake handler for this queue
2023-06-13 15:44:42 -06:00
* @ wake_data : data associated with the wake handler
2021-09-23 14:10:58 -03:00
* @ key : the key the futex is hashed on
* @ pi_state : optional priority inheritance state
* @ rt_waiter : rt_waiter storage for use with requeue_pi
* @ requeue_pi_key : the requeue_pi target futex key
* @ bitset : bitset for the optional bitmasked wakeup
* @ requeue_state : State field for futex_requeue_pi ( )
* @ requeue_wait : RCU wait for futex_requeue_pi ( ) ( RT only )
*
* We use this hashed waitqueue , instead of a normal wait_queue_entry_t , so
* we can wake only the relevant ones ( hashed queues may be shared ) .
*
* A futex_q has a woken state , just like tasks have TASK_RUNNING .
* It is considered woken when plist_node_empty ( & q - > list ) | | q - > lock_ptr = = 0.
* The order of wakeup is always to make the first condition true , then
* the second .
*
* PI futexes are typically woken before they are removed from the hash list via
* the rt_mutex code . See futex_unqueue_pi ( ) .
*/
struct futex_q {
struct plist_node list ;
struct task_struct * task ;
spinlock_t * lock_ptr ;
2023-06-08 11:56:06 -06:00
futex_wake_fn * wake ;
2023-06-13 15:44:42 -06:00
void * wake_data ;
2021-09-23 14:10:58 -03:00
union futex_key key ;
struct futex_pi_state * pi_state ;
struct rt_mutex_waiter * rt_waiter ;
union futex_key * requeue_pi_key ;
u32 bitset ;
atomic_t requeue_state ;
# ifdef CONFIG_PREEMPT_RT
struct rcuwait requeue_wait ;
# endif
} __randomize_layout ;
extern const struct futex_q futex_q_init ;
enum futex_access {
FUTEX_READ ,
FUTEX_WRITE
} ;
2023-09-21 12:45:13 +02:00
extern int get_futex_key ( u32 __user * uaddr , unsigned int flags , union futex_key * key ,
2021-09-23 14:10:58 -03:00
enum futex_access rw ) ;
extern struct hrtimer_sleeper *
futex_setup_timer ( ktime_t * time , struct hrtimer_sleeper * timeout ,
int flags , u64 range_ns ) ;
2021-09-23 14:11:02 -03:00
extern struct futex_hash_bucket * futex_hash ( union futex_key * key ) ;
/**
* futex_match - Check whether two futex keys are equal
* @ key1 : Pointer to key1
* @ key2 : Pointer to key2
*
* Return 1 if two futex_keys are equal , 0 otherwise .
*/
static inline int futex_match ( union futex_key * key1 , union futex_key * key2 )
{
return ( key1 & & key2
& & key1 - > both . word = = key2 - > both . word
& & key1 - > both . ptr = = key2 - > both . ptr
& & key1 - > both . offset = = key2 - > both . offset ) ;
}
extern int futex_wait_setup ( u32 __user * uaddr , u32 val , unsigned int flags ,
struct futex_q * q , struct futex_hash_bucket * * hb ) ;
extern void futex_wait_queue ( struct futex_hash_bucket * hb , struct futex_q * q ,
struct hrtimer_sleeper * timeout ) ;
2023-07-12 09:14:52 -06:00
extern bool __futex_wake_mark ( struct futex_q * q ) ;
2021-09-23 14:11:02 -03:00
extern void futex_wake_mark ( struct wake_q_head * wake_q , struct futex_q * q ) ;
2021-09-23 14:10:58 -03:00
extern int fault_in_user_writeable ( u32 __user * uaddr ) ;
extern int futex_cmpxchg_value_locked ( u32 * curval , u32 __user * uaddr , u32 uval , u32 newval ) ;
extern int futex_get_value_locked ( u32 * dest , u32 __user * from ) ;
extern struct futex_q * futex_top_waiter ( struct futex_hash_bucket * hb , union futex_key * key ) ;
2021-09-23 14:11:02 -03:00
extern void __futex_unqueue ( struct futex_q * q ) ;
2021-09-23 14:10:58 -03:00
extern void __futex_queue ( struct futex_q * q , struct futex_hash_bucket * hb ) ;
2021-09-23 14:11:03 -03:00
extern int futex_unqueue ( struct futex_q * q ) ;
/**
* futex_queue ( ) - Enqueue the futex_q on the futex_hash_bucket
* @ q : The futex_q to enqueue
* @ hb : The destination hash bucket
*
* The hb - > lock must be held by the caller , and is released here . A call to
* futex_queue ( ) is typically paired with exactly one call to futex_unqueue ( ) . The
* exceptions involve the PI related operations , which may use futex_unqueue_pi ( )
* or nothing if the unqueue is done as part of the wake process and the unqueue
* state is implicit in the state of woken task ( see futex_wait_requeue_pi ( ) for
* an example ) .
*/
static inline void futex_queue ( struct futex_q * q , struct futex_hash_bucket * hb )
__releases ( & hb - > lock )
{
__futex_queue ( q , hb ) ;
spin_unlock ( & hb - > lock ) ;
}
2021-09-23 14:10:58 -03:00
extern void futex_unqueue_pi ( struct futex_q * q ) ;
extern void wait_for_owner_exiting ( int ret , struct task_struct * exiting ) ;
2021-09-23 14:11:02 -03:00
/*
* Reflects a new waiter being added to the waitqueue .
*/
static inline void futex_hb_waiters_inc ( struct futex_hash_bucket * hb )
{
# ifdef CONFIG_SMP
atomic_inc ( & hb - > waiters ) ;
/*
* Full barrier ( A ) , see the ordering comment above .
*/
smp_mb__after_atomic ( ) ;
# endif
}
/*
* Reflects a waiter being removed from the waitqueue by wakeup
* paths .
*/
static inline void futex_hb_waiters_dec ( struct futex_hash_bucket * hb )
{
# ifdef CONFIG_SMP
atomic_dec ( & hb - > waiters ) ;
# endif
}
2021-09-23 14:11:03 -03:00
static inline int futex_hb_waiters_pending ( struct futex_hash_bucket * hb )
{
# ifdef CONFIG_SMP
/*
* Full barrier ( B ) , see the ordering comment above .
*/
smp_mb ( ) ;
return atomic_read ( & hb - > waiters ) ;
# else
return 1 ;
# endif
}
2021-09-23 14:10:58 -03:00
extern struct futex_hash_bucket * futex_q_lock ( struct futex_q * q ) ;
extern void futex_q_unlock ( struct futex_hash_bucket * hb ) ;
extern int futex_lock_pi_atomic ( u32 __user * uaddr , struct futex_hash_bucket * hb ,
union futex_key * key ,
struct futex_pi_state * * ps ,
struct task_struct * task ,
struct task_struct * * exiting ,
int set_waiters ) ;
extern int refill_pi_state_cache ( void ) ;
extern void get_pi_state ( struct futex_pi_state * pi_state ) ;
extern void put_pi_state ( struct futex_pi_state * pi_state ) ;
extern int fixup_pi_owner ( u32 __user * uaddr , struct futex_q * q , int locked ) ;
2021-09-23 14:11:02 -03:00
/*
* Express the locking dependencies for lockdep :
*/
static inline void
double_lock_hb ( struct futex_hash_bucket * hb1 , struct futex_hash_bucket * hb2 )
{
2021-09-23 14:11:04 -03:00
if ( hb1 > hb2 )
swap ( hb1 , hb2 ) ;
spin_lock ( & hb1 - > lock ) ;
if ( hb1 ! = hb2 )
spin_lock_nested ( & hb2 - > lock , SINGLE_DEPTH_NESTING ) ;
2021-09-23 14:11:02 -03:00
}
static inline void
double_unlock_hb ( struct futex_hash_bucket * hb1 , struct futex_hash_bucket * hb2 )
{
spin_unlock ( & hb1 - > lock ) ;
if ( hb1 ! = hb2 )
spin_unlock ( & hb2 - > lock ) ;
}
2021-09-23 14:10:58 -03:00
/* syscalls */
2021-09-23 14:10:51 -03:00
extern int futex_wait_requeue_pi ( u32 __user * uaddr , unsigned int flags , u32
val , ktime_t * abs_time , u32 bitset , u32 __user
* uaddr2 ) ;
2023-09-21 12:45:14 +02:00
extern int futex_requeue ( u32 __user * uaddr1 , unsigned int flags1 ,
u32 __user * uaddr2 , unsigned int flags2 ,
int nr_wake , int nr_requeue ,
2021-09-23 14:10:51 -03:00
u32 * cmpval , int requeue_pi ) ;
2023-09-21 12:45:12 +02:00
extern int __futex_wait ( u32 __user * uaddr , unsigned int flags , u32 val ,
struct hrtimer_sleeper * to , u32 bitset ) ;
2021-09-23 14:10:51 -03:00
extern int futex_wait ( u32 __user * uaddr , unsigned int flags , u32 val ,
ktime_t * abs_time , u32 bitset ) ;
futex: Implement sys_futex_waitv()
Add support to wait on multiple futexes. This is the interface
implemented by this syscall:
futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
unsigned int flags, struct timespec *timeout, clockid_t clockid)
struct futex_waitv {
__u64 val;
__u64 uaddr;
__u32 flags;
__u32 __reserved;
};
Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timeout is an optional
absolute timeout value for the operation. This syscall supports only
64bit sized timeout structs. The flags argument of the syscall should be
empty, but it can be used for future extensions. Flags for shared
futexes, sizes, etc. should be used on the individual flags of each
waiter.
__reserved is used for explicit padding and should be 0, but it might be
used for future extensions. If the userspace uses 32-bit pointers, it
should make sure to explicitly cast it when assigning to waitv::uaddr.
Returns the array index of one of the woken futexes. There’s no given
information of how many were woken, or any particular attribute of it
(if it’s the first woken, if it is of the smaller index...).
Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
2021-09-23 14:11:05 -03:00
/**
* struct futex_vector - Auxiliary struct for futex_waitv ( )
* @ w : Userspace provided data
* @ q : Kernel side data
*
* Struct used to build an array with all data need for futex_waitv ( )
*/
struct futex_vector {
struct futex_waitv w ;
struct futex_q q ;
} ;
2023-06-13 08:31:58 -06:00
extern int futex_parse_waitv ( struct futex_vector * futexv ,
struct futex_waitv __user * uwaitv ,
unsigned int nr_futexes , futex_wake_fn * wake ,
void * wake_data ) ;
2023-06-13 08:34:08 -06:00
extern int futex_wait_multiple_setup ( struct futex_vector * vs , int count ,
int * woken ) ;
extern int futex_unqueue_multiple ( struct futex_vector * v , int count ) ;
futex: Implement sys_futex_waitv()
Add support to wait on multiple futexes. This is the interface
implemented by this syscall:
futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
unsigned int flags, struct timespec *timeout, clockid_t clockid)
struct futex_waitv {
__u64 val;
__u64 uaddr;
__u32 flags;
__u32 __reserved;
};
Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timeout is an optional
absolute timeout value for the operation. This syscall supports only
64bit sized timeout structs. The flags argument of the syscall should be
empty, but it can be used for future extensions. Flags for shared
futexes, sizes, etc. should be used on the individual flags of each
waiter.
__reserved is used for explicit padding and should be 0, but it might be
used for future extensions. If the userspace uses 32-bit pointers, it
should make sure to explicitly cast it when assigning to waitv::uaddr.
Returns the array index of one of the woken futexes. There’s no given
information of how many were woken, or any particular attribute of it
(if it’s the first woken, if it is of the smaller index...).
Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
2021-09-23 14:11:05 -03:00
extern int futex_wait_multiple ( struct futex_vector * vs , unsigned int count ,
struct hrtimer_sleeper * to ) ;
2021-09-23 14:10:51 -03:00
extern int futex_wake ( u32 __user * uaddr , unsigned int flags , int nr_wake , u32 bitset ) ;
extern int futex_wake_op ( u32 __user * uaddr1 , unsigned int flags ,
u32 __user * uaddr2 , int nr_wake , int nr_wake2 , int op ) ;
extern int futex_unlock_pi ( u32 __user * uaddr , unsigned int flags ) ;
extern int futex_lock_pi ( u32 __user * uaddr , unsigned int flags , ktime_t * time , int trylock ) ;
# endif /* _FUTEX_H */