Merge branch 'for-next/sve' into for-next/core
Optimise SVE switching for CPUs with 128-bit implementations. * for-next/sve: arm64/sve: Skip flushing Z registers with 128 bit vectors arm64/sve: Use the sve_flush macros in sve_load_from_fpsimd_state() arm64/sve: Split _sve_flush macro into separate Z and predicate flushes
This commit is contained in:
commit
3d1bf78c7b
@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
|
|||||||
extern void sve_save_state(void *state, u32 *pfpsr);
|
extern void sve_save_state(void *state, u32 *pfpsr);
|
||||||
extern void sve_load_state(void const *state, u32 const *pfpsr,
|
extern void sve_load_state(void const *state, u32 const *pfpsr,
|
||||||
unsigned long vq_minus_1);
|
unsigned long vq_minus_1);
|
||||||
extern void sve_flush_live(void);
|
extern void sve_flush_live(unsigned long vq_minus_1);
|
||||||
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
|
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
|
||||||
unsigned long vq_minus_1);
|
unsigned long vq_minus_1);
|
||||||
extern unsigned int sve_get_vl(void);
|
extern unsigned int sve_get_vl(void);
|
||||||
|
@ -213,8 +213,10 @@
|
|||||||
mov v\nz\().16b, v\nz\().16b
|
mov v\nz\().16b, v\nz\().16b
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro sve_flush
|
.macro sve_flush_z
|
||||||
_for n, 0, 31, _sve_flush_z \n
|
_for n, 0, 31, _sve_flush_z \n
|
||||||
|
.endm
|
||||||
|
.macro sve_flush_p_ffr
|
||||||
_for n, 0, 15, _sve_pfalse \n
|
_for n, 0, 15, _sve_pfalse \n
|
||||||
_sve_wrffr 0
|
_sve_wrffr 0
|
||||||
.endm
|
.endm
|
||||||
|
@ -63,16 +63,24 @@ SYM_FUNC_END(sve_set_vq)
|
|||||||
* and the rest zeroed. All the other SVE registers will be zeroed.
|
* and the rest zeroed. All the other SVE registers will be zeroed.
|
||||||
*/
|
*/
|
||||||
SYM_FUNC_START(sve_load_from_fpsimd_state)
|
SYM_FUNC_START(sve_load_from_fpsimd_state)
|
||||||
sve_load_vq x1, x2, x3
|
sve_load_vq x1, x2, x3
|
||||||
fpsimd_restore x0, 8
|
fpsimd_restore x0, 8
|
||||||
_for n, 0, 15, _sve_pfalse \n
|
sve_flush_p_ffr
|
||||||
_sve_wrffr 0
|
ret
|
||||||
ret
|
|
||||||
SYM_FUNC_END(sve_load_from_fpsimd_state)
|
SYM_FUNC_END(sve_load_from_fpsimd_state)
|
||||||
|
|
||||||
/* Zero all SVE registers but the first 128-bits of each vector */
|
/*
|
||||||
|
* Zero all SVE registers but the first 128-bits of each vector
|
||||||
|
*
|
||||||
|
* VQ must already be configured by caller, any further updates of VQ
|
||||||
|
* will need to ensure that the register state remains valid.
|
||||||
|
*
|
||||||
|
* x0 = VQ - 1
|
||||||
|
*/
|
||||||
SYM_FUNC_START(sve_flush_live)
|
SYM_FUNC_START(sve_flush_live)
|
||||||
sve_flush
|
cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
|
||||||
|
sve_flush_z
|
||||||
|
1: sve_flush_p_ffr
|
||||||
ret
|
ret
|
||||||
SYM_FUNC_END(sve_flush_live)
|
SYM_FUNC_END(sve_flush_live)
|
||||||
|
|
||||||
|
@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
|
|||||||
* disabling the trap, otherwise update our in-memory copy.
|
* disabling the trap, otherwise update our in-memory copy.
|
||||||
*/
|
*/
|
||||||
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
||||||
sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
|
unsigned long vq_minus_one =
|
||||||
sve_flush_live();
|
sve_vq_from_vl(current->thread.sve_vl) - 1;
|
||||||
|
sve_set_vq(vq_minus_one);
|
||||||
|
sve_flush_live(vq_minus_one);
|
||||||
fpsimd_bind_task_to_cpu();
|
fpsimd_bind_task_to_cpu();
|
||||||
} else {
|
} else {
|
||||||
fpsimd_to_sve(current);
|
fpsimd_to_sve(current);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user