arm64: fpsimd: run kernel mode NEON with softirqs disabled
Kernel mode NEON can be used in task or softirq context, but only in a non-nesting manner, i.e., softirq context is only permitted if the interrupt was not taken at a point where the kernel was using the NEON in task context. This means all users of kernel mode NEON have to be aware of this limitation, and either need to provide scalar fallbacks that may be much slower (up to 20x for AES instructions) and potentially less safe, or use an asynchronous interface that defers processing to a later time when the NEON is guaranteed to be available. Given that grabbing and releasing the NEON is cheap, we can relax this restriction, by increasing the granularity of kernel mode NEON code, and always disabling softirq processing while the NEON is being used in task context. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Acked-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20210302090118.30666-4-ardb@kernel.org Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
This commit is contained in:
parent
4c4dcd3541
commit
13150149aa
@ -700,7 +700,7 @@ AES_FUNC_START(aes_mac_update)
|
|||||||
cbz w5, .Lmacout
|
cbz w5, .Lmacout
|
||||||
encrypt_block v0, w2, x1, x7, w8
|
encrypt_block v0, w2, x1, x7, w8
|
||||||
st1 {v0.16b}, [x4] /* return dg */
|
st1 {v0.16b}, [x4] /* return dg */
|
||||||
cond_yield .Lmacout, x7
|
cond_yield .Lmacout, x7, x8
|
||||||
b .Lmacloop4x
|
b .Lmacloop4x
|
||||||
.Lmac1x:
|
.Lmac1x:
|
||||||
add w3, w3, #4
|
add w3, w3, #4
|
||||||
|
@ -121,7 +121,7 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
|||||||
add dgav.4s, dgav.4s, dg0v.4s
|
add dgav.4s, dgav.4s, dg0v.4s
|
||||||
|
|
||||||
cbz w2, 2f
|
cbz w2, 2f
|
||||||
cond_yield 3f, x5
|
cond_yield 3f, x5, x6
|
||||||
b 0b
|
b 0b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -129,7 +129,7 @@ CPU_LE( rev32 v19.16b, v19.16b )
|
|||||||
|
|
||||||
/* handled all input blocks? */
|
/* handled all input blocks? */
|
||||||
cbz w2, 2f
|
cbz w2, 2f
|
||||||
cond_yield 3f, x5
|
cond_yield 3f, x5, x6
|
||||||
b 0b
|
b 0b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform)
|
|||||||
eor v0.16b, v0.16b, v31.16b
|
eor v0.16b, v0.16b, v31.16b
|
||||||
|
|
||||||
cbnz w8, 3b
|
cbnz w8, 3b
|
||||||
cond_yield 3f, x8
|
cond_yield 4f, x8, x9
|
||||||
cbnz w2, 0b
|
cbnz w2, 0b
|
||||||
|
|
||||||
/* save state */
|
/* save state */
|
||||||
3: st1 { v0.1d- v3.1d}, [x0], #32
|
4: st1 { v0.1d- v3.1d}, [x0], #32
|
||||||
st1 { v4.1d- v7.1d}, [x0], #32
|
st1 { v4.1d- v7.1d}, [x0], #32
|
||||||
st1 { v8.1d-v11.1d}, [x0], #32
|
st1 { v8.1d-v11.1d}, [x0], #32
|
||||||
st1 {v12.1d-v15.1d}, [x0], #32
|
st1 {v12.1d-v15.1d}, [x0], #32
|
||||||
|
@ -195,7 +195,7 @@ CPU_LE( rev64 v19.16b, v19.16b )
|
|||||||
add v10.2d, v10.2d, v2.2d
|
add v10.2d, v10.2d, v2.2d
|
||||||
add v11.2d, v11.2d, v3.2d
|
add v11.2d, v11.2d, v3.2d
|
||||||
|
|
||||||
cond_yield 3f, x4
|
cond_yield 3f, x4, x5
|
||||||
/* handled all input blocks? */
|
/* handled all input blocks? */
|
||||||
cbnz w2, 0b
|
cbnz w2, 0b
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <asm-generic/export.h>
|
#include <asm-generic/export.h>
|
||||||
|
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
|
#include <asm/alternative.h>
|
||||||
#include <asm/cpufeature.h>
|
#include <asm/cpufeature.h>
|
||||||
#include <asm/cputype.h>
|
#include <asm/cputype.h>
|
||||||
#include <asm/debug-monitors.h>
|
#include <asm/debug-monitors.h>
|
||||||
@ -701,19 +702,32 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether preempt-disabled code should yield as soon as it
|
* Check whether preempt/bh-disabled asm code should yield as soon as
|
||||||
* is able. This is the case if re-enabling preemption a single
|
* it is able. This is the case if we are currently running in task
|
||||||
* time results in a preempt count of zero, and the TIF_NEED_RESCHED
|
* context, and either a softirq is pending, or the TIF_NEED_RESCHED
|
||||||
* flag is set. (Note that the latter is stored negated in the
|
* flag is set and re-enabling preemption a single time would result in
|
||||||
* top word of the thread_info::preempt_count field)
|
* a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
|
||||||
|
* stored negated in the top word of the thread_info::preempt_count
|
||||||
|
* field)
|
||||||
*/
|
*/
|
||||||
.macro cond_yield, lbl:req, tmp:req
|
.macro cond_yield, lbl:req, tmp:req, tmp2:req
|
||||||
#ifdef CONFIG_PREEMPTION
|
|
||||||
get_current_task \tmp
|
get_current_task \tmp
|
||||||
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
|
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
|
||||||
|
/*
|
||||||
|
* If we are serving a softirq, there is no point in yielding: the
|
||||||
|
* softirq will not be preempted no matter what we do, so we should
|
||||||
|
* run to completion as quickly as we can.
|
||||||
|
*/
|
||||||
|
tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
|
||||||
|
#ifdef CONFIG_PREEMPTION
|
||||||
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
|
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
|
||||||
cbz \tmp, \lbl
|
cbz \tmp, \lbl
|
||||||
#endif
|
#endif
|
||||||
|
adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
|
||||||
|
this_cpu_offset \tmp2
|
||||||
|
ldr w\tmp, [\tmp, \tmp2]
|
||||||
|
cbnz w\tmp, \lbl // yield on pending softirq in task context
|
||||||
|
.Lnoyield_\@:
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -95,6 +95,8 @@ int main(void)
|
|||||||
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
|
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
|
||||||
BLANK();
|
BLANK();
|
||||||
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
|
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
|
||||||
|
DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
|
||||||
|
DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
|
||||||
BLANK();
|
BLANK();
|
||||||
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
|
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
|
||||||
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
|
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
|
||||||
|
@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void)
|
|||||||
*/
|
*/
|
||||||
static void get_cpu_fpsimd_context(void)
|
static void get_cpu_fpsimd_context(void)
|
||||||
{
|
{
|
||||||
preempt_disable();
|
local_bh_disable();
|
||||||
__get_cpu_fpsimd_context();
|
__get_cpu_fpsimd_context();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void)
|
|||||||
static void put_cpu_fpsimd_context(void)
|
static void put_cpu_fpsimd_context(void)
|
||||||
{
|
{
|
||||||
__put_cpu_fpsimd_context();
|
__put_cpu_fpsimd_context();
|
||||||
preempt_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool have_cpu_fpsimd_context(void)
|
static bool have_cpu_fpsimd_context(void)
|
||||||
|
Loading…
Reference in New Issue
Block a user