diff --git a/arch/arm64/core/CMakeLists.txt b/arch/arm64/core/CMakeLists.txt index cc573d880c04a5..e36c9a6ae7eda4 100644 --- a/arch/arm64/core/CMakeLists.txt +++ b/arch/arm64/core/CMakeLists.txt @@ -20,6 +20,7 @@ zephyr_library_sources( vector_table.S ) +zephyr_library_sources_ifdef(CONFIG_FPU_SHARING fpu.c fpu.S) zephyr_library_sources_ifdef(CONFIG_ARM_MMU mmu.c mmu.S) zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace.S) zephyr_library_sources_ifdef(CONFIG_GEN_SW_ISR_TABLE isr_wrapper.S) diff --git a/arch/arm64/core/Kconfig b/arch/arm64/core/Kconfig index 71ee80b881c714..32f5555b5df04e 100644 --- a/arch/arm64/core/Kconfig +++ b/arch/arm64/core/Kconfig @@ -8,6 +8,9 @@ config CPU_CORTEX_A select CPU_CORTEX select HAS_FLASH_LOAD_OFFSET select SCHED_IPI_SUPPORTED if SMP + select CPU_HAS_FPU + imply FPU + imply FPU_SHARING help This option signifies the use of a CPU of the Cortex-A family. diff --git a/arch/arm64/core/fpu.S b/arch/arm64/core/fpu.S new file mode 100644 index 00000000000000..d5c3bce9318220 --- /dev/null +++ b/arch/arm64/core/fpu.S @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021 BayLibre SAS + * Written by: Nicolas Pitre + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +_ASM_FILE_PROLOGUE + +GTEXT(z_arm64_fpu_save) +SECTION_FUNC(TEXT, z_arm64_fpu_save) + + stp q0, q1, [x0, #(16 * 0)] + stp q2, q3, [x0, #(16 * 2)] + stp q4, q5, [x0, #(16 * 4)] + stp q6, q7, [x0, #(16 * 6)] + stp q8, q9, [x0, #(16 * 8)] + stp q10, q11, [x0, #(16 * 10)] + stp q12, q13, [x0, #(16 * 12)] + stp q14, q15, [x0, #(16 * 14)] + stp q16, q17, [x0, #(16 * 16)] + stp q18, q19, [x0, #(16 * 18)] + stp q20, q21, [x0, #(16 * 20)] + stp q22, q23, [x0, #(16 * 22)] + stp q24, q25, [x0, #(16 * 24)] + stp q26, q27, [x0, #(16 * 26)] + stp q28, q29, [x0, #(16 * 28)] + stp q30, q31, [x0, #(16 * 30)] + + mrs x1, fpsr + mrs x2, fpcr + str w1, [x0, #(16 * 32 + 0)] + str w2, [x0, #(16 * 32 + 4)] + + ret + +GTEXT(z_arm64_fpu_restore) +SECTION_FUNC(TEXT, z_arm64_fpu_restore) + + ldp q0, q1, [x0, #(16 * 0)] + ldp q2, q3, [x0, #(16 * 2)] + ldp q4, q5, [x0, #(16 * 4)] + ldp q6, q7, [x0, #(16 * 6)] + ldp q8, q9, [x0, #(16 * 8)] + ldp q10, q11, [x0, #(16 * 10)] + ldp q12, q13, [x0, #(16 * 12)] + ldp q14, q15, [x0, #(16 * 14)] + ldp q16, q17, [x0, #(16 * 16)] + ldp q18, q19, [x0, #(16 * 18)] + ldp q20, q21, [x0, #(16 * 20)] + ldp q22, q23, [x0, #(16 * 22)] + ldp q24, q25, [x0, #(16 * 24)] + ldp q26, q27, [x0, #(16 * 26)] + ldp q28, q29, [x0, #(16 * 28)] + ldp q30, q31, [x0, #(16 * 30)] + + ldr w1, [x0, #(16 * 32 + 0)] + ldr w2, [x0, #(16 * 32 + 4)] + msr fpsr, x1 + msr fpcr, x2 + + ret diff --git a/arch/arm64/core/fpu.c b/arch/arm64/core/fpu.c new file mode 100644 index 00000000000000..5e50226f7a206a --- /dev/null +++ b/arch/arm64/core/fpu.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2021 BayLibre SAS + * Written by: Nicolas Pitre + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +/* to be found in fpu.S */ +extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context); +extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context); + +#define FPU_DEBUG 0 + +#if FPU_DEBUG + +/* + * Debug traces have to be produced without printk() or any other functions + * using a va_list as va_start() always copy the FPU registers that could be + * used to pass float arguments, and that triggers an FPU access trap. + */ + +#include + +static void DBG(char *msg, struct k_thread *th) +{ + char buf[80], *p; + unsigned int v; + + strcpy(buf, "CPU# exc# "); + buf[3] = '0' + _current_cpu->id; + buf[8] = '0' + arch_exception_depth(); + strcat(buf, _current->name); + strcat(buf, ": "); + strcat(buf, msg); + strcat(buf, " "); + strcat(buf, th->name); + + + v = *(unsigned char *)&th->arch.saved_fp_context; + p = buf + strlen(buf); + *p++ = ' '; + *p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a'); + *p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a'); + *p++ = '\n'; + *p = 0; + + k_str_out(buf, p - buf); +} + +#else + +static inline void DBG(char *msg, struct k_thread *t) { } + +#endif /* FPU_DEBUG */ + +/* + * Flush FPU content and disable access. + * This is called locally and also from flush_fpu_ipi_handler(). + */ +void z_arm64_flush_local_fpu(void) +{ + __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); + + struct k_thread *owner = _current_cpu->arch.fpu_owner; + + if (owner != NULL) { + uint64_t cpacr = read_cpacr_el1(); + + /* turn on FPU access */ + write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); + isb(); + + /* save current owner's content */ + z_arm64_fpu_save(&owner->arch.saved_fp_context); + /* make sure content made it to memory before releasing */ + dsb(); + /* release ownership */ + _current_cpu->arch.fpu_owner = NULL; + DBG("disable", owner); + + /* disable FPU access */ + write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); + } +} + +#ifdef CONFIG_SMP +static void flush_owned_fpu(struct k_thread *thread) +{ + __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); + + int i; + + /* search all CPUs for the owner we want */ + for (i = 0; i < CONFIG_MP_NUM_CPUS; i++) { + if (_kernel.cpus[i].arch.fpu_owner != thread) { + continue; + } + /* we found it live on CPU i */ + if (i == _current_cpu->id) { + z_arm64_flush_local_fpu(); + } else { + /* the FPU context is live on another CPU */ + z_arm64_flush_fpu_ipi(i); + + /* + * Wait for it only if this is about the thread + * currently running on this CPU. Otherwise the + * other CPU running some other thread could regain + * ownership the moment it is removed from it and + * we would be stuck here. + * + * Also, if this is for the thread running on this + * CPU, then we preemptively flush any live context + * on this CPU as well since we're likely to + * replace it, and this avoids a deadlock where + * two CPUs want to pull each other's FPU context. + */ + if (thread == _current) { + z_arm64_flush_local_fpu(); + while (_kernel.cpus[i].arch.fpu_owner == thread) { + dsb(); + } + } + } + break; + } +} +#endif + +void z_arm64_fpu_enter_exc(void) +{ + __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); + + /* always deny FPU access whenever an exception is entered */ + write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP); + isb(); +} + +/* + * Process the FPU trap. + * + * This usually means that FP regs belong to another thread. Save them + * to that thread's save area and restore the current thread's content. + * + * We also get here when FP regs are used while in exception as FP access + * is always disabled by default in that case. If so we save the FPU content + * to the owning thread and simply enable FPU access. Exceptions should be + * short and don't have persistent register contexts when they're done so + * there is nothing to save/restore for that context... as long as we + * don't get interrupted that is. To ensure that we mask interrupts to + * the triggering exception context. + */ +void z_arm64_fpu_trap(z_arch_esf_t *esf) +{ + __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); + + /* turn on FPU access */ + write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP); + isb(); + + /* save current owner's content if any */ + struct k_thread *owner = _current_cpu->arch.fpu_owner; + + if (owner) { + z_arm64_fpu_save(&owner->arch.saved_fp_context); + dsb(); + _current_cpu->arch.fpu_owner = NULL; + DBG("save", owner); + } + + if (arch_exception_depth() > 1) { + /* + * We were already in exception when the FPU access trap. + * We give it access and prevent any further IRQ recursion + * by disabling IRQs as we wouldn't be able to preserve the + * interrupted exception's FPU context. + */ + esf->spsr |= DAIF_IRQ_BIT; + return; + } + +#ifdef CONFIG_SMP + /* + * Make sure the FPU context we need isn't live on another CPU. + * The current CPU's FPU context is NULL at this point. + */ + flush_owned_fpu(_current); +#endif + + /* become new owner */ + _current_cpu->arch.fpu_owner = _current; + + /* restore our content */ + z_arm64_fpu_restore(&_current->arch.saved_fp_context); + DBG("restore", _current); +} + +/* + * Perform lazy FPU context switching by simply granting or denying + * access to FP regs based on FPU ownership before leaving the last + * exception level. If current thread doesn't own the FP regs then + * it will trap on its first access and then the actual FPU context + * switching will occur. + * + * This is called on every exception exit except for z_arm64_fpu_trap(). + */ +void z_arm64_fpu_exit_exc(void) +{ + __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled"); + + uint64_t cpacr = read_cpacr_el1(); + + if (arch_exception_depth() == 1) { + /* We're about to leave exception mode */ + if (_current_cpu->arch.fpu_owner == _current) { + /* turn on FPU access */ + write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP); + } else { + /* deny FPU access */ + write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); + } + } else { + /* + * Shallower exception levels should always trap on FPU + * access as we want to make sure IRQs are disabled before + * granting them access. + */ + write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP); + } +} + +int arch_float_disable(struct k_thread *thread) +{ + if (thread != NULL) { + unsigned int key = arch_irq_lock(); + +#ifdef CONFIG_SMP + flush_owned_fpu(thread); +#else + if (thread == _current_cpu->arch.fpu_owner) { + z_arm64_flush_local_fpu(); + } +#endif + + arch_irq_unlock(key); + } + + return 0; +} + +int arch_float_enable(struct k_thread *thread, unsigned int options) +{ + /* floats always gets enabled automatically at the moment */ + return 0; +} diff --git a/arch/arm64/core/reset.c b/arch/arm64/core/reset.c index 6e362747260470..2c5372f6b84f46 100644 --- a/arch/arm64/core/reset.c +++ b/arch/arm64/core/reset.c @@ -149,7 +149,7 @@ void z_arm64_el1_init(void) isb(); reg = 0U; /* RES0 */ - reg |= CPACR_EL1_FPEN_NOTRAP; /* Do not trap NEON/SIMD/FP */ + reg |= CPACR_EL1_FPEN_NOTRAP; /* Do not trap NEON/SIMD/FP initially */ /* TODO: CONFIG_FLOAT_*_FORBIDDEN */ write_cpacr_el1(reg); diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c index 9f2ca61bad48cb..9ae6d178e44619 100644 --- a/arch/arm64/core/smp.c +++ b/arch/arm64/core/smp.c @@ -27,6 +27,7 @@ #define SGI_SCHED_IPI 0 #define SGI_PTABLE_IPI 1 +#define SGI_FPU_IPI 2 struct boot_params { uint64_t mpid; @@ -129,6 +130,9 @@ void z_arm64_secondary_start(void) #ifdef CONFIG_USERSPACE irq_enable(SGI_PTABLE_IPI); #endif +#ifdef CONFIG_FPU_SHARING + irq_enable(SGI_FPU_IPI); +#endif #endif fn = arm64_cpu_boot_params.fn; @@ -191,6 +195,24 @@ void z_arm64_ptable_ipi(void) } #endif +#ifdef CONFIG_FPU_SHARING +void flush_fpu_ipi_handler(const void *unused) +{ + ARG_UNUSED(unused); + + disable_irq(); + z_arm64_flush_local_fpu(); + /* no need to re-enable IRQs here */ +} + +void z_arm64_flush_fpu_ipi(unsigned int cpu) +{ + const uint64_t mpidr = GET_MPIDR(); + + gic_raise_sgi(SGI_FPU_IPI, mpidr, (1 << cpu)); +} +#endif + static int arm64_smp_init(const struct device *dev) { ARG_UNUSED(dev); @@ -206,6 +228,10 @@ static int arm64_smp_init(const struct device *dev) IRQ_CONNECT(SGI_PTABLE_IPI, IRQ_DEFAULT_PRIORITY, ptable_ipi_handler, NULL, 0); irq_enable(SGI_PTABLE_IPI); #endif +#ifdef CONFIG_FPU_SHARING + IRQ_CONNECT(SGI_FPU_IPI, IRQ_DEFAULT_PRIORITY, flush_fpu_ipi_handler, NULL, 0); + irq_enable(SGI_FPU_IPI); +#endif return 0; } diff --git a/arch/arm64/core/switch.S b/arch/arm64/core/switch.S index 772e1b4f137ce2..5235f75f6fc116 100644 --- a/arch/arm64/core/switch.S +++ b/arch/arm64/core/switch.S @@ -125,6 +125,15 @@ SECTION_FUNC(TEXT, z_arm64_sync_exc) mrs x0, esr_el1 lsr x1, x0, #26 +#ifdef CONFIG_FPU_SHARING + cmp x1, #0x07 /*Access to SIMD or floating-point */ + bne 1f + mov x0, sp + bl z_arm64_fpu_trap + b z_arm64_exit_exc_fpu_done +1: +#endif + cmp x1, #0x15 /* 0x15 = SVC */ bne inv diff --git a/arch/arm64/core/vector_table.S b/arch/arm64/core/vector_table.S index e7a8bc3875492a..efdcaf8424a041 100644 --- a/arch/arm64/core/vector_table.S +++ b/arch/arm64/core/vector_table.S @@ -58,6 +58,10 @@ _ASM_FILE_PROLOGUE add \xreg0, \xreg0, \xreg1 msr tpidrro_el0, \xreg0 +#ifdef CONFIG_FPU_SHARING + bl z_arm64_fpu_enter_exc +#endif + .endm /* @@ -209,6 +213,13 @@ SECTION_FUNC(TEXT, z_arm64_serror) GTEXT(z_arm64_exit_exc) SECTION_FUNC(TEXT, z_arm64_exit_exc) +#ifdef CONFIG_FPU_SHARING + bl z_arm64_fpu_exit_exc + + GTEXT(z_arm64_exit_exc_fpu_done) + SECTION_FUNC(TEXT, z_arm64_exit_exc_fpu_done) +#endif + ldp x0, x1, [sp, ___esf_t_spsr_elr_OFFSET] msr spsr_el1, x0 msr elr_el1, x1 diff --git a/arch/arm64/include/kernel_arch_func.h b/arch/arm64/include/kernel_arch_func.h index 09bd937242375e..44cd6943b332ab 100644 --- a/arch/arm64/include/kernel_arch_func.h +++ b/arch/arm64/include/kernel_arch_func.h @@ -44,6 +44,11 @@ extern void z_arm64_userspace_enter(z_arch_esf_t *esf, uintptr_t sp_el0); extern void z_arm64_set_ttbr0(uintptr_t ttbr0); extern void z_arm64_ptable_ipi(void); +#ifdef CONFIG_FPU_SHARING +void z_arm64_flush_local_fpu(void); +void z_arm64_flush_fpu_ipi(unsigned int cpu); +#endif + #endif /* _ASMLANGUAGE */ #ifdef __cplusplus diff --git a/doc/reference/kernel/other/float.rst b/doc/reference/kernel/other/float.rst index 8b967f6aa51b6f..d9a1e85c2a225b 100644 --- a/doc/reference/kernel/other/float.rst +++ b/doc/reference/kernel/other/float.rst @@ -133,6 +133,32 @@ If an ARM thread does not require use of the floating point registers any more, it can call :c:func:`k_float_disable`. This instructs the kernel not to save or restore its FP context during thread context switching. +ARM64 architecture +------------------ + +.. note:: + The Shared FP registers mode is the default Floating Point + Services mode on ARM64. The compiler is free to optimize code + using FP/SIMD registers, and library functions such as memcpy + are known to make use of them. + +On the ARM64 (Aarch64) architecture the kernel treats each thread as a FPU +user on a case-by-case basis. A "lazy save" algorithm is used during context +switching which updates the floating point registers only when it is absolutely +necessary. For example, the registers are *not* saved when switching from an +FPU user to a non-user thread, and then back to the original FPU user. + +FPU register usage by ISRs is supported although not recommended. When an +ISR uses floating point or SIMD registers, then the access is trapped, the +current FPU user context is saved in the thread object and the ISR is resumed +with interrupts disabled so to prevent another IRQ from interrupting the ISR +and potentially requesting FPU usage. Because ISR don't have a persistent +register context, there are no provision for saving an ISR's FPU context +either, hence the IRQ disabling. + +Each thread object becomes 512 bytes larger when Shared FP registers mode +is enabled. + ARCv2 architecture ------------------ diff --git a/include/arch/arm64/structs.h b/include/arch/arm64/structs.h index 25d0659377c3f5..25052ccabe49d5 100644 --- a/include/arch/arm64/structs.h +++ b/include/arch/arm64/structs.h @@ -9,7 +9,9 @@ /* Per CPU architecture specifics */ struct _cpu_arch { - /* content coming soon */ +#ifdef CONFIG_FPU_SHARING + struct k_thread *fpu_owner; +#endif }; #endif /* ZEPHYR_INCLUDE_ARM64_STRUCTS_H_ */ diff --git a/include/arch/arm64/thread.h b/include/arch/arm64/thread.h index f8156246f72048..9b542abcc68fb8 100644 --- a/include/arch/arm64/thread.h +++ b/include/arch/arm64/thread.h @@ -40,9 +40,20 @@ struct _callee_saved { typedef struct _callee_saved _callee_saved_t; +struct z_arm64_fp_context { + __int128 q0, q1, q2, q3, q4, q5, q6, q7; + __int128 q8, q9, q10, q11, q12, q13, q14, q15; + __int128 q16, q17, q18, q19, q20, q21, q22, q23; + __int128 q24, q25, q26, q27, q28, q29, q30, q31; + uint32_t fpsr, fpcr; +}; + struct _thread_arch { #ifdef CONFIG_USERSPACE struct arm_mmu_ptables *ptables; +#endif +#ifdef CONFIG_FPU_SHARING + struct z_arm64_fp_context saved_fp_context; #endif uint8_t exception_depth; }; diff --git a/subsys/testsuite/ztest/src/ztest.c b/subsys/testsuite/ztest/src/ztest.c index 5766cd05f45b87..d25eaf5620f110 100644 --- a/subsys/testsuite/ztest/src/ztest.c +++ b/subsys/testsuite/ztest/src/ztest.c @@ -116,6 +116,17 @@ static void cpu_hold(void *arg1, void *arg2, void *arg3) k_sem_give(&cpuhold_sem); +#if defined(CONFIG_ARM64) && defined(CONFIG_FPU_SHARING) + /* + * We'll be spinning with IRQs disabled. The flush-your-FPU request + * IPI will never be serviced during that time. Therefore we flush + * the FPU preemptively here to prevent any other CPU waiting after + * this CPU forever and deadlock the system. + */ + extern void z_arm64_flush_local_fpu(void); + z_arm64_flush_local_fpu(); +#endif + while (cpuhold_active) { k_busy_wait(1000); } diff --git a/tests/kernel/fpu_sharing/generic/src/float_context.h b/tests/kernel/fpu_sharing/generic/src/float_context.h index bbee7c19742efd..23e68b95d85a18 100644 --- a/tests/kernel/fpu_sharing/generic/src/float_context.h +++ b/tests/kernel/fpu_sharing/generic/src/float_context.h @@ -83,6 +83,21 @@ struct fp_non_volatile_register_set { float s[16]; }; +#define SIZEOF_FP_VOLATILE_REGISTER_SET \ + sizeof(struct fp_volatile_register_set) +#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET \ + sizeof(struct fp_non_volatile_register_set) + +#elif defined(CONFIG_ARM64) + +struct fp_volatile_register_set { + __int128 regs[16]; /* q0..q15 */ +}; + +struct fp_non_volatile_register_set { + __int128 regs[16]; /* q16..q31 */ +}; + #define SIZEOF_FP_VOLATILE_REGISTER_SET \ sizeof(struct fp_volatile_register_set) #define SIZEOF_FP_NON_VOLATILE_REGISTER_SET \ diff --git a/tests/kernel/fpu_sharing/generic/src/float_regs_arm64_gcc.h b/tests/kernel/fpu_sharing/generic/src/float_regs_arm64_gcc.h new file mode 100644 index 00000000000000..0d7d3d4412c873 --- /dev/null +++ b/tests/kernel/fpu_sharing/generic/src/float_regs_arm64_gcc.h @@ -0,0 +1,116 @@ +/** + * @file + * @brief ARM64 GCC specific floating point register macros + */ + +/* + * Copyright (c) 2021 BayLibre SAS + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _FLOAT_REGS_ARM64_GCC_H +#define _FLOAT_REGS_ARM64_GCC_H + +#include +#include "float_context.h" + +/** + * + * @brief Load all floating point registers + * + * This function loads ALL floating point registers pointed to by @a regs. + * It is expected that a subsequent call to _store_all_float_registers() + * will be issued to dump the floating point registers to memory. + * + * The format/organization of 'struct fp_register_set'; the generic C test + * code (main.c) merely treat the register set as an array of bytes. + * + * The only requirement is that the arch specific implementations of + * _load_all_float_registers() and _store_all_float_registers() agree + * on the format. + * + * @return N/A + */ +static inline void _load_all_float_registers(struct fp_register_set *regs) +{ + __asm__ volatile ( + "ldp q0, q1, [x0, #(16 * 0)]\n\t" + "ldp q2, q3, [x0, #(16 * 2)]\n\t" + "ldp q4, q5, [x0, #(16 * 4)]\n\t" + "ldp q6, q7, [x0, #(16 * 6)]\n\t" + "ldp q8, q9, [x0, #(16 * 8)]\n\t" + "ldp q10, q11, [x0, #(16 * 10)]\n\t" + "ldp q12, q13, [x0, #(16 * 12)]\n\t" + "ldp q14, q15, [x0, #(16 * 14)]\n\t" + "ldp q16, q17, [x0, #(16 * 16)]\n\t" + "ldp q18, q19, [x0, #(16 * 18)]\n\t" + "ldp q20, q21, [x0, #(16 * 20)]\n\t" + "ldp q22, q23, [x0, #(16 * 22)]\n\t" + "ldp q24, q25, [x0, #(16 * 24)]\n\t" + "ldp q26, q27, [x0, #(16 * 26)]\n\t" + "ldp q28, q29, [x0, #(16 * 28)]\n\t" + "ldp q30, q31, [x0, #(16 * 30)]" + : + : "r" (regs) + ); +} + +/** + * + * @brief Dump all floating point registers to memory + * + * This function stores ALL floating point registers to the memory buffer + * specified by @a regs. It is expected that a previous invocation of + * _load_all_float_registers() occurred to load all the floating point + * registers from a memory buffer. + * + * @return N/A + */ + +static inline void _store_all_float_registers(struct fp_register_set *regs) +{ + __asm__ volatile ( + "stp q0, q1, [x0, #(16 * 0)]\n\t" + "stp q2, q3, [x0, #(16 * 2)]\n\t" + "stp q4, q5, [x0, #(16 * 4)]\n\t" + "stp q6, q7, [x0, #(16 * 6)]\n\t" + "stp q8, q9, [x0, #(16 * 8)]\n\t" + "stp q10, q11, [x0, #(16 * 10)]\n\t" + "stp q12, q13, [x0, #(16 * 12)]\n\t" + "stp q14, q15, [x0, #(16 * 14)]\n\t" + "stp q16, q17, [x0, #(16 * 16)]\n\t" + "stp q18, q19, [x0, #(16 * 18)]\n\t" + "stp q20, q21, [x0, #(16 * 20)]\n\t" + "stp q22, q23, [x0, #(16 * 22)]\n\t" + "stp q24, q25, [x0, #(16 * 24)]\n\t" + "stp q26, q27, [x0, #(16 * 26)]\n\t" + "stp q28, q29, [x0, #(16 * 28)]\n\t" + "stp q30, q31, [x0, #(16 * 30)]" + : + : "r" (regs) + : "memory" + ); +} + +/** + * + * @brief Load then dump all float registers to memory + * + * This function loads ALL floating point registers from the memory buffer + * specified by @a regs, and then stores them back to that buffer. + * + * This routine is called by a high priority thread prior to calling a primitive + * that pends and triggers a co-operative context switch to a low priority + * thread. + * + * @return N/A + */ + +static inline void _load_then_store_all_float_registers( + struct fp_register_set *regs) +{ + _load_all_float_registers(regs); + _store_all_float_registers(regs); +} +#endif /* _FLOAT_REGS_ARM64_GCC_H */ diff --git a/tests/kernel/fpu_sharing/generic/src/load_store.c b/tests/kernel/fpu_sharing/generic/src/load_store.c index ae4f5770e8cb4f..7ddb7a3f65ff76 100644 --- a/tests/kernel/fpu_sharing/generic/src/load_store.c +++ b/tests/kernel/fpu_sharing/generic/src/load_store.c @@ -50,6 +50,12 @@ #else #include "float_regs_arm_other.h" #endif /* __GNUC__ */ +#elif defined(CONFIG_ARM64) +#if defined(__GNUC__) +#include "float_regs_arm64_gcc.h" +#else +#include "float_regs_arm64_other.h" +#endif /* __GNUC__ */ #elif defined(CONFIG_ISA_ARCV2) #if defined(__GNUC__) #include "float_regs_arc_gcc.h" @@ -84,7 +90,7 @@ static volatile unsigned int load_store_low_count; static volatile unsigned int load_store_high_count; /* Indicates that the load/store test exited */ -static bool test_exited; +static volatile bool test_exited; /* Semaphore for signaling end of test */ static K_SEM_DEFINE(test_exit_sem, 0, 1); diff --git a/tests/kernel/fpu_sharing/generic/src/pi.c b/tests/kernel/fpu_sharing/generic/src/pi.c index 0c754d77f5e70d..454146274ccc73 100644 --- a/tests/kernel/fpu_sharing/generic/src/pi.c +++ b/tests/kernel/fpu_sharing/generic/src/pi.c @@ -52,7 +52,7 @@ static volatile unsigned int calc_pi_low_count; static volatile unsigned int calc_pi_high_count; /* Indicates that the load/store test exited */ -static bool test_exited; +static volatile bool test_exited; /* Semaphore for signaling end of test */ static K_SEM_DEFINE(test_exit_sem, 0, 1); diff --git a/tests/kernel/fpu_sharing/generic/testcase.yaml b/tests/kernel/fpu_sharing/generic/testcase.yaml index 1ae6422177f16f..bd15b019e9afb8 100644 --- a/tests/kernel/fpu_sharing/generic/testcase.yaml +++ b/tests/kernel/fpu_sharing/generic/testcase.yaml @@ -13,6 +13,13 @@ tests: tags: kernel timeout: 600 min_ram: 16 + kernel.fpu_sharing.generic.arm64: + extra_args: PI_NUM_ITERATIONS=70000 + arch_allow: arm64 + filter: CONFIG_CPU_CORTEX_A + slow: true + tags: kernel + timeout: 600 kernel.fpu_sharing.generic.riscv32: extra_args: PI_NUM_ITERATIONS=500 filter: CONFIG_CPU_HAS_FPU diff --git a/tests/kernel/interrupt/src/nested_irq.c b/tests/kernel/interrupt/src/nested_irq.c index 923cd5ed2bd771..6c3b5441dd78c5 100644 --- a/tests/kernel/interrupt/src/nested_irq.c +++ b/tests/kernel/interrupt/src/nested_irq.c @@ -16,6 +16,15 @@ #define TEST_NESTED_ISR #endif +#if defined(CONFIG_ARM64) && defined(CONFIG_FPU_SHARING) +/* + * The various log outputs trigger FP access due to the va_list used by + * printk() and friends. IRQs are masked to prevent further IRQ nesting + * when that happens. + */ +#undef TEST_NESTED_ISR +#endif + #define DURATION 5 #define ISR0_TOKEN 0xDEADBEEF diff --git a/tests/kernel/mem_protect/mem_protect/src/mem_protect.h b/tests/kernel/mem_protect/mem_protect/src/mem_protect.h index 88bbc0c5a4e767..ff908ea2ac5f22 100644 --- a/tests/kernel/mem_protect/mem_protect/src/mem_protect.h +++ b/tests/kernel/mem_protect/mem_protect/src/mem_protect.h @@ -131,7 +131,7 @@ static inline void set_fault_valid(bool valid) -#if defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define TEST_HEAP_SIZE (2 << CONFIG_MAX_THREAD_BYTES) * 1024 #define MAX_OBJ 512 #else