-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arm64: FPU context switching support
This adds FPU sharing support with a lazy context switching algorithm. Every thread is allowed to use FPU/SIMD registers. In fact, the compiler may insert FPU reg accesses in anycontext to optimize even non-FP code unless the -mgeneral-regs-only compiler flag is used, but Zephyr currently doesn't support such a build. It is therefore possible to do FP access in IRS as well with this patch although IRQs are then disabled to prevent nested IRQs in such cases. Because the thread object grows in size, some tests have to be adjusted. Signed-off-by: Nicolas Pitre <[email protected]>
- Loading branch information
1 parent
a82fff0
commit f1f63dd
Showing
20 changed files
with
588 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
* Copyright (c) 2021 BayLibre SAS | ||
* Written by: Nicolas Pitre | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
#include <toolchain.h> | ||
#include <linker/sections.h> | ||
|
||
_ASM_FILE_PROLOGUE | ||
|
||
GTEXT(z_arm64_fpu_save) | ||
SECTION_FUNC(TEXT, z_arm64_fpu_save) | ||
|
||
stp q0, q1, [x0, #(16 * 0)] | ||
stp q2, q3, [x0, #(16 * 2)] | ||
stp q4, q5, [x0, #(16 * 4)] | ||
stp q6, q7, [x0, #(16 * 6)] | ||
stp q8, q9, [x0, #(16 * 8)] | ||
stp q10, q11, [x0, #(16 * 10)] | ||
stp q12, q13, [x0, #(16 * 12)] | ||
stp q14, q15, [x0, #(16 * 14)] | ||
stp q16, q17, [x0, #(16 * 16)] | ||
stp q18, q19, [x0, #(16 * 18)] | ||
stp q20, q21, [x0, #(16 * 20)] | ||
stp q22, q23, [x0, #(16 * 22)] | ||
stp q24, q25, [x0, #(16 * 24)] | ||
stp q26, q27, [x0, #(16 * 26)] | ||
stp q28, q29, [x0, #(16 * 28)] | ||
stp q30, q31, [x0, #(16 * 30)] | ||
|
||
mrs x1, fpsr | ||
mrs x2, fpcr | ||
str w1, [x0, #(16 * 32 + 0)] | ||
str w2, [x0, #(16 * 32 + 4)] | ||
|
||
ret | ||
|
||
GTEXT(z_arm64_fpu_restore) | ||
SECTION_FUNC(TEXT, z_arm64_fpu_restore) | ||
|
||
ldp q0, q1, [x0, #(16 * 0)] | ||
ldp q2, q3, [x0, #(16 * 2)] | ||
ldp q4, q5, [x0, #(16 * 4)] | ||
ldp q6, q7, [x0, #(16 * 6)] | ||
ldp q8, q9, [x0, #(16 * 8)] | ||
ldp q10, q11, [x0, #(16 * 10)] | ||
ldp q12, q13, [x0, #(16 * 12)] | ||
ldp q14, q15, [x0, #(16 * 14)] | ||
ldp q16, q17, [x0, #(16 * 16)] | ||
ldp q18, q19, [x0, #(16 * 18)] | ||
ldp q20, q21, [x0, #(16 * 20)] | ||
ldp q22, q23, [x0, #(16 * 22)] | ||
ldp q24, q25, [x0, #(16 * 24)] | ||
ldp q26, q27, [x0, #(16 * 26)] | ||
ldp q28, q29, [x0, #(16 * 28)] | ||
ldp q30, q31, [x0, #(16 * 30)] | ||
|
||
ldr w1, [x0, #(16 * 32 + 0)] | ||
ldr w2, [x0, #(16 * 32 + 4)] | ||
msr fpsr, x1 | ||
msr fpcr, x2 | ||
|
||
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
/* | ||
* Copyright (c) 2021 BayLibre SAS | ||
* Written by: Nicolas Pitre | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
#include <kernel.h> | ||
#include <kernel_structs.h> | ||
#include <kernel_arch_interface.h> | ||
#include <arch/cpu.h> | ||
|
||
/* to be found in fpu.S */ | ||
extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context); | ||
extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context); | ||
|
||
#define FPU_DEBUG 0 | ||
|
||
#if FPU_DEBUG | ||
|
||
/* | ||
* Debug traces have to be produced without printk() or any other functions | ||
* using a va_list as va_start() always copy the FPU registers that could be | ||
* used to pass float arguments, and that triggers an FPU access trap. | ||
*/ | ||
|
||
#include <string.h> | ||
|
||
static void DBG(char *msg, struct k_thread *th) | ||
{ | ||
char buf[80], *p; | ||
unsigned int v; | ||
|
||
strcpy(buf, "CPU# exc# "); | ||
buf[3] = '0' + _current_cpu->id; | ||
buf[8] = '0' + arch_exception_depth(); | ||
strcat(buf, _current->name); | ||
strcat(buf, ": "); | ||
strcat(buf, msg); | ||
strcat(buf, " "); | ||
strcat(buf, th->name); | ||
|
||
|
||
v = *(unsigned char *)&th->arch.saved_fp_context; | ||
p = buf + strlen(buf); | ||
*p++ = ' '; | ||
*p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a'); | ||
*p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a'); | ||
*p++ = '\n'; | ||
*p = 0; | ||
|
||
k_str_out(buf, p - buf); | ||
} | ||
|
||
#else | ||
|
||
static inline void DBG(char *msg, struct k_thread *t) { } | ||
|
||
#endif /* FPU_DEBUG */ | ||
|
||
/* | ||
* Flush FPU content and disable access. | ||
* This is called locally and also from flush_fpu_ipi_handler(). | ||
*/ | ||
void z_arm64_flush_local_fpu(void) | ||
{ | ||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); | ||
|
||
struct k_thread *owner = _current_cpu->arch.fpu_owner; | ||
|
||
if (owner != NULL) { | ||
uint64_t cpacr = read_cpacr_el1(); | ||
|
||
/* turn on FPU access */ | ||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); | ||
isb(); | ||
|
||
/* save current owner's content */ | ||
z_arm64_fpu_save(&owner->arch.saved_fp_context); | ||
/* make sure content made it to memory before releasing */ | ||
dsb(); | ||
/* release ownership */ | ||
_current_cpu->arch.fpu_owner = NULL; | ||
DBG("disable", owner); | ||
|
||
/* disable FPU access */ | ||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); | ||
} | ||
} | ||
|
||
#ifdef CONFIG_SMP | ||
static void flush_owned_fpu(struct k_thread *thread) | ||
{ | ||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); | ||
|
||
int i; | ||
|
||
/* search all CPUs for the owner we want */ | ||
for (i = 0; i < CONFIG_MP_NUM_CPUS; i++) { | ||
if (_kernel.cpus[i].arch.fpu_owner != thread) { | ||
continue; | ||
} | ||
/* we found it live on CPU i */ | ||
if (i == _current_cpu->id) { | ||
z_arm64_flush_local_fpu(); | ||
} else { | ||
/* the FPU context is live on another CPU */ | ||
z_arm64_flush_fpu_ipi(i); | ||
|
||
/* | ||
* Wait for it only if this is about the thread | ||
* currently running on this CPU. Otherwise the | ||
* other CPU running some other thread could regain | ||
* ownership the moment it is removed from it and | ||
* we would be stuck here. | ||
* | ||
* Also, if this is for the thread running on this | ||
* CPU, then we preemptively flush any live context | ||
* on this CPU as well since we're likely to | ||
* replace it, and this avoids a deadlock where | ||
* two CPUs want to pull each other's FPU context. | ||
*/ | ||
if (thread == _current) { | ||
z_arm64_flush_local_fpu(); | ||
while (_kernel.cpus[i].arch.fpu_owner == thread) { | ||
dsb(); | ||
} | ||
} | ||
} | ||
break; | ||
} | ||
} | ||
#endif | ||
|
||
void z_arm64_fpu_enter_exc(void) | ||
{ | ||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); | ||
|
||
/* always deny FPU access whenever an exception is entered */ | ||
write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP); | ||
isb(); | ||
} | ||
|
||
/* | ||
* Process the FPU trap. | ||
* | ||
* This usually means that FP regs belong to another thread. Save them | ||
* to that thread's save area and restore the current thread's content. | ||
* | ||
* We also get here when FP regs are used while in exception as FP access | ||
* is always disabled by default in that case. If so we save the FPU content | ||
* to the owning thread and simply enable FPU access. Exceptions should be | ||
* short and don't have persistent register contexts when they're done so | ||
* there is nothing to save/restore for that context... as long as we | ||
* don't get interrupted that is. To ensure that we mask interrupts to | ||
* the triggering exception context. | ||
*/ | ||
void z_arm64_fpu_trap(z_arch_esf_t *esf) | ||
{ | ||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); | ||
|
||
/* turn on FPU access */ | ||
write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP); | ||
isb(); | ||
|
||
/* save current owner's content if any */ | ||
struct k_thread *owner = _current_cpu->arch.fpu_owner; | ||
|
||
if (owner) { | ||
z_arm64_fpu_save(&owner->arch.saved_fp_context); | ||
dsb(); | ||
_current_cpu->arch.fpu_owner = NULL; | ||
DBG("save", owner); | ||
} | ||
|
||
if (arch_exception_depth() > 1) { | ||
/* | ||
* We were already in exception when the FPU access trap. | ||
* We give it access and prevent any further IRQ recursion | ||
* by disabling IRQs as we wouldn't be able to preserve the | ||
* interrupted exception's FPU context. | ||
*/ | ||
esf->spsr |= DAIF_IRQ_BIT; | ||
return; | ||
} | ||
|
||
#ifdef CONFIG_SMP | ||
/* | ||
* Make sure the FPU context we need isn't live on another CPU. | ||
* The current CPU's FPU context is NULL at this point. | ||
*/ | ||
flush_owned_fpu(_current); | ||
#endif | ||
|
||
/* become new owner */ | ||
_current_cpu->arch.fpu_owner = _current; | ||
|
||
/* restore our content */ | ||
z_arm64_fpu_restore(&_current->arch.saved_fp_context); | ||
DBG("restore", _current); | ||
} | ||
|
||
/* | ||
* Perform lazy FPU context switching by simply granting or denying | ||
* access to FP regs based on FPU ownership before leaving the last | ||
* exception level. If current thread doesn't own the FP regs then | ||
* it will trap on its first access and then the actual FPU context | ||
* switching will occur. | ||
* | ||
* This is called on every exception exit except for z_arm64_fpu_trap(). | ||
*/ | ||
void z_arm64_fpu_exit_exc(void) | ||
{ | ||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); | ||
|
||
uint64_t cpacr = read_cpacr_el1(); | ||
|
||
if (arch_exception_depth() == 1) { | ||
/* We're about to leave exception mode */ | ||
if (_current_cpu->arch.fpu_owner == _current) { | ||
/* turn on FPU access */ | ||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); | ||
} else { | ||
/* deny FPU access */ | ||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); | ||
} | ||
} else { | ||
/* | ||
* Shallower exception levels should always trap on FPU | ||
* access as we want to make sure IRQs are disabled before | ||
* granting them access. | ||
*/ | ||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); | ||
} | ||
} | ||
|
||
int arch_float_disable(struct k_thread *thread) | ||
{ | ||
if (thread != NULL) { | ||
unsigned int key = arch_irq_lock(); | ||
|
||
#ifdef CONFIG_SMP | ||
flush_owned_fpu(thread); | ||
#else | ||
if (thread == _current_cpu->arch.fpu_owner) { | ||
z_arm64_flush_local_fpu(); | ||
} | ||
#endif | ||
|
||
arch_irq_unlock(key); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
int arch_float_enable(struct k_thread *thread, unsigned int options) | ||
{ | ||
/* floats always gets enabled automatically at the moment */ | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.