From 10286e9b2d32f8566b3693e920095b36030d9816 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 7 Jul 2022 10:14:43 -0700 Subject: [PATCH] Unix arm64 atomics (#71512) * Define_InterlockMethod macro * compiler failure * fix build errors * Set g_arm64_atomics_present at common place * Fix the missing declaration * Change TARGET_ARM64 => HOST_ARM64 * Use LSE for InterlockedCompareExchange * Attempt to fix osx-arm64 build issue * Introduce LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT * Make sure that compiler knows that M1 has lse --- eng/native/configurecompiler.cmake | 5 + src/coreclr/debug/createdump/datatarget.cpp | 6 + src/coreclr/dlls/mscordbi/mscordbi.cpp | 7 + src/coreclr/pal/inc/pal.h | 345 +++++++++----------- src/coreclr/pal/src/init/pal.cpp | 6 + src/coreclr/vm/codeman.cpp | 6 +- 6 files changed, 180 insertions(+), 195 deletions(-) diff --git a/eng/native/configurecompiler.cmake b/eng/native/configurecompiler.cmake index 047999bded88f..551a2dc7f2a2b 100644 --- a/eng/native/configurecompiler.cmake +++ b/eng/native/configurecompiler.cmake @@ -375,6 +375,11 @@ if (CLR_CMAKE_HOST_UNIX) if(CLR_CMAKE_HOST_OSX OR CLR_CMAKE_HOST_MACCATALYST) # We cannot enable "stack-protector-strong" on OS X due to a bug in clang compiler (current version 7.0.2) add_compile_options(-fstack-protector) + if(CLR_CMAKE_HOST_UNIX_ARM64) + # For OSX-Arm64, LSE instructions are enabled by default + add_definitions(-DLSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) + add_compile_options(-mcpu=apple-m1) + endif(CLR_CMAKE_HOST_UNIX_ARM64) elseif(NOT CLR_CMAKE_HOST_BROWSER) check_c_compiler_flag(-fstack-protector-strong COMPILER_SUPPORTS_F_STACK_PROTECTOR_STRONG) if (COMPILER_SUPPORTS_F_STACK_PROTECTOR_STRONG) diff --git a/src/coreclr/debug/createdump/datatarget.cpp b/src/coreclr/debug/createdump/datatarget.cpp index d2efc6e2615f3..5a4438fca5f08 100644 --- a/src/coreclr/debug/createdump/datatarget.cpp +++ b/src/coreclr/debug/createdump/datatarget.cpp @@ -3,6 +3,12 @@ #include "createdump.h" +#if defined(HOST_ARM64) +// Flag to check if atomics feature is available on +// the machine +bool g_arm64_atomics_present = false; +#endif + DumpDataTarget::DumpDataTarget(CrashInfo& crashInfo) : m_ref(1), m_crashInfo(crashInfo) diff --git a/src/coreclr/dlls/mscordbi/mscordbi.cpp b/src/coreclr/dlls/mscordbi/mscordbi.cpp index afd2cfe800225..42ad8a090a6f6 100644 --- a/src/coreclr/dlls/mscordbi/mscordbi.cpp +++ b/src/coreclr/dlls/mscordbi/mscordbi.cpp @@ -19,6 +19,13 @@ extern BOOL WINAPI DbgDllMain(HINSTANCE hInstance, DWORD dwReason, //***************************************************************************** extern "C" #ifdef TARGET_UNIX + +#if defined(HOST_ARM64) +// Flag to check if atomics feature is available on +// the machine +bool g_arm64_atomics_present = false; +#endif + DLLEXPORT // For Win32 PAL LoadLibrary emulation #endif BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index dd7e99c665b10..124e355560cee 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -74,6 +74,12 @@ extern "C" { // On Unix systems, NATIVE_LIBRARY_HANDLE type represents a library handle not registered with the PAL. typedef PVOID NATIVE_LIBRARY_HANDLE; +#if defined(HOST_ARM64) +// Flag to check if atomics feature is available on +// the machine +extern bool g_arm64_atomics_present; +#endif + /******************* Processor-specific glue *****************************/ #ifndef _MSC_VER @@ -3466,6 +3472,55 @@ FORCEINLINE void PAL_ArmInterlockedOperationBarrier() #endif } +#if defined(HOST_ARM64) + +#if defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) + +#define Define_InterlockMethod(RETURN_TYPE, METHOD_DECL, METHOD_INVOC, INTRINSIC_NAME) \ +EXTERN_C PALIMPORT inline RETURN_TYPE PALAPI METHOD_DECL \ +{ \ + return INTRINSIC_NAME; \ +} \ + +#else // !LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT + +#define Define_InterlockMethod(RETURN_TYPE, METHOD_DECL, METHOD_INVOC, INTRINSIC_NAME) \ +/* Function multiversioning will never inline a method that is \ + marked such. However, just to make sure that we don't see \ + surprises, explicitely mark them as noinline. */ \ +__attribute__((target("lse"))) __attribute__((noinline)) \ +EXTERN_C PALIMPORT inline RETURN_TYPE PALAPI Lse_##METHOD_DECL \ +{ \ + return INTRINSIC_NAME; \ +} \ + \ +EXTERN_C PALIMPORT inline RETURN_TYPE PALAPI METHOD_DECL \ +{ \ + if (g_arm64_atomics_present) \ + { \ + return Lse_##METHOD_INVOC; \ + } \ + else \ + { \ + RETURN_TYPE result = INTRINSIC_NAME; \ + PAL_ArmInterlockedOperationBarrier(); \ + return result; \ + } \ +} \ + +#endif // LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT +#else // !HOST_ARM64 + +#define Define_InterlockMethod(RETURN_TYPE, METHOD_DECL, METHOD_INVOC, INTRINSIC_NAME) \ +EXTERN_C PALIMPORT inline RETURN_TYPE PALAPI METHOD_DECL \ +{ \ + RETURN_TYPE result = INTRINSIC_NAME; \ + PAL_ArmInterlockedOperationBarrier(); \ + return result; \ +} \ + +#endif // HOST_ARM64 + /*++ Function: InterlockedAdd @@ -3486,33 +3541,19 @@ Return Values The return value is the resulting added value. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedAdd( - IN OUT LONG volatile *lpAddend, - IN LONG value) -{ - LONG result = __sync_add_and_fetch(lpAddend, value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedAdd64( - IN OUT LONGLONG volatile *lpAddend, - IN LONGLONG value) -{ - LONGLONG result = __sync_add_and_fetch(lpAddend, value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedAdd( IN OUT LONG volatile *lpAddend, IN LONG value), + InterlockedAdd(lpAddend, value), + __sync_add_and_fetch(lpAddend, value) +) + +Define_InterlockMethod( + LONGLONG, + InterlockedAdd64(IN OUT LONGLONG volatile *lpAddend, IN LONGLONG value), + InterlockedAdd64(lpAddend, value), + __sync_add_and_fetch(lpAddend, value) +) /*++ Function: @@ -3533,31 +3574,19 @@ Return Values The return value is the resulting incremented value. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedIncrement( - IN OUT LONG volatile *lpAddend) -{ - LONG result = __sync_add_and_fetch(lpAddend, (LONG)1); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedIncrement64( - IN OUT LONGLONG volatile *lpAddend) -{ - LONGLONG result = __sync_add_and_fetch(lpAddend, (LONGLONG)1); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedIncrement(IN OUT LONG volatile *lpAddend), + InterlockedIncrement(lpAddend), + __sync_add_and_fetch(lpAddend, (LONG)1) +) + +Define_InterlockMethod( + LONGLONG, + InterlockedIncrement64(IN OUT LONGLONG volatile *lpAddend), + InterlockedIncrement64(lpAddend), + __sync_add_and_fetch(lpAddend, (LONGLONG)1) +) /*++ Function: @@ -3578,33 +3607,21 @@ Return Values The return value is the resulting decremented value. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedDecrement( - IN OUT LONG volatile *lpAddend) -{ - LONG result = __sync_sub_and_fetch(lpAddend, (LONG)1); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedDecrement(IN OUT LONG volatile *lpAddend), + InterlockedDecrement(lpAddend), + __sync_sub_and_fetch(lpAddend, (LONG)1) +) #define InterlockedDecrementRelease InterlockedDecrement -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedDecrement64( - IN OUT LONGLONG volatile *lpAddend) -{ - LONGLONG result = __sync_sub_and_fetch(lpAddend, (LONGLONG)1); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONGLONG, + InterlockedDecrement64(IN OUT LONGLONG volatile *lpAddend), + InterlockedDecrement64(lpAddend), + __sync_sub_and_fetch(lpAddend, (LONGLONG)1) +) /*++ Function: @@ -3627,33 +3644,19 @@ Return Values The function returns the initial value pointed to by Target. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedExchange( - IN OUT LONG volatile *Target, - IN LONG Value) -{ - LONG result = __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedExchange64( - IN OUT LONGLONG volatile *Target, - IN LONGLONG Value) -{ - LONGLONG result = __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedExchange(IN OUT LONG volatile *Target, LONG Value), + InterlockedExchange(Target, Value), + __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL) +) + +Define_InterlockMethod( + LONGLONG, + InterlockedExchange64(IN OUT LONGLONG volatile *Target, IN LONGLONG Value), + InterlockedExchange64(Target, Value), + __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL) +) /*++ Function: @@ -3678,47 +3681,29 @@ Return Values The return value is the initial value of the destination. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedCompareExchange( - IN OUT LONG volatile *Destination, - IN LONG Exchange, - IN LONG Comperand) -{ - LONG result = - __sync_val_compare_and_swap( - Destination, /* The pointer to a variable whose value is to be compared with. */ - Comperand, /* The value to be compared */ - Exchange /* The value to be stored */); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedCompareExchange(IN OUT LONG volatile *Destination, IN LONG Exchange, IN LONG Comperand), + InterlockedCompareExchange(Destination, Exchange, Comperand), + __sync_val_compare_and_swap( + Destination, /* The pointer to a variable whose value is to be compared with. */ + Comperand, /* The value to be compared */ + Exchange /* The value to be stored */) +) #define InterlockedCompareExchangeAcquire InterlockedCompareExchange #define InterlockedCompareExchangeRelease InterlockedCompareExchange // See the 32-bit variant in interlock2.s -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedCompareExchange64( - IN OUT LONGLONG volatile *Destination, - IN LONGLONG Exchange, - IN LONGLONG Comperand) -{ - LONGLONG result = - __sync_val_compare_and_swap( - Destination, /* The pointer to a variable whose value is to be compared with. */ - Comperand, /* The value to be compared */ - Exchange /* The value to be stored */); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONGLONG, + InterlockedCompareExchange64(IN OUT LONGLONG volatile *Destination, IN LONGLONG Exchange, IN LONGLONG Comperand), + InterlockedCompareExchange64(Destination, Exchange, Comperand), + __sync_val_compare_and_swap( + Destination, /* The pointer to a variable whose value is to be compared with. */ + Comperand, /* The value to be compared */ + Exchange /* The value to be stored */) +) /*++ Function: @@ -3737,61 +3722,33 @@ Return Values The return value is the original value that 'Addend' pointed to. --*/ -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedExchangeAdd( - IN OUT LONG volatile *Addend, - IN LONG Value) -{ - LONG result = __sync_fetch_and_add(Addend, Value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONGLONG -PALAPI -InterlockedExchangeAdd64( - IN OUT LONGLONG volatile *Addend, - IN LONGLONG Value) -{ - LONGLONG result = __sync_fetch_and_add(Addend, Value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedAnd( - IN OUT LONG volatile *Destination, - IN LONG Value) -{ - LONG result = __sync_fetch_and_and(Destination, Value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} - -EXTERN_C -PALIMPORT -inline -LONG -PALAPI -InterlockedOr( - IN OUT LONG volatile *Destination, - IN LONG Value) -{ - LONG result = __sync_fetch_and_or(Destination, Value); - PAL_ArmInterlockedOperationBarrier(); - return result; -} +Define_InterlockMethod( + LONG, + InterlockedExchangeAdd(IN OUT LONG volatile *Addend, IN LONG Value), + InterlockedExchangeAdd(Addend, Value), + __sync_fetch_and_add(Addend, Value) +) + +Define_InterlockMethod( + LONGLONG, + InterlockedExchangeAdd64(IN OUT LONGLONG volatile *Addend, IN LONGLONG Value), + InterlockedExchangeAdd64(Addend, Value), + __sync_fetch_and_add(Addend, Value) +) + +Define_InterlockMethod( + LONG, + InterlockedAnd(IN OUT LONG volatile *Destination, IN LONG Value), + InterlockedAnd(Destination, Value), + __sync_fetch_and_and(Destination, Value) +) + +Define_InterlockMethod( + LONG, + InterlockedOr(IN OUT LONG volatile *Destination, IN LONG Value), + InterlockedOr(Destination, Value), + __sync_fetch_and_or(Destination, Value) +) #if defined(HOST_64BIT) #define InterlockedExchangePointer(Target, Value) \ diff --git a/src/coreclr/pal/src/init/pal.cpp b/src/coreclr/pal/src/init/pal.cpp index a5a6ac15743ba..37576bdf077f8 100644 --- a/src/coreclr/pal/src/init/pal.cpp +++ b/src/coreclr/pal/src/init/pal.cpp @@ -106,6 +106,12 @@ extern "C" BOOL CRTInitStdStreams( void ); extern bool g_running_in_exe; +#if defined(HOST_ARM64) +// Flag to check if atomics feature is available on +// the machine +bool g_arm64_atomics_present = false; +#endif + Volatile init_count = 0; Volatile shutdown_intent = 0; Volatile g_coreclrInitialized = 0; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 7ee4f99c54c65..7ef3a86fa9f60 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1566,7 +1566,6 @@ void EEJitManager::SetCpuInfo() if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { CPUCompileFlags.Set(InstructionSet_Atomics); - g_arm64_atomics_present = true; } // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE (43) @@ -1584,6 +1583,11 @@ void EEJitManager::SetCpuInfo() // We set the flag when the instruction is permitted and the block size is 64 bytes. CPUCompileFlags.Set(InstructionSet_Dczva); } + + if (CPUCompileFlags.IsSet(InstructionSet_Atomics)) + { + g_arm64_atomics_present = true; + } #endif // TARGET_ARM64 // Now that we've queried the actual hardware support, we need to adjust what is actually supported based