Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVX10.1 API introduction in JIT #101938

Merged
merged 22 commits into from
Jun 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8e7745f
Add AVX10v1 API surface
khushal1996 May 8, 2024
41ab759
Define HWINTRINSIC for AVX10v1, AVX10v1_V256 and AVX10v1_V512
khushal1996 May 8, 2024
445be70
Setup template testing for AVX10v1 APIs
khushal1996 May 8, 2024
cad5824
Handle AVX10v1 APIs in JIT where equivalent AVX512* APIs are handled
khushal1996 May 8, 2024
4079f41
Merge Avx10v1 and Avx10v1.V256. Rename Avx10.cs to Avx10v1.cs
khushal1996 May 9, 2024
8054bdf
Add Avx10v1 to relevant places
khushal1996 May 9, 2024
3386ed4
Fix CI errors. Add missing API in Avx10v1.PlatofrmNotSupported ad end…
khushal1996 May 9, 2024
2b56317
Changes to be made with latest changes on main. Make appropriate comm…
khushal1996 May 10, 2024
43d9d80
Lower AVX10v1 hwintrinsic in lowering and gentree.cpp for simdSize 32/16
khushal1996 May 11, 2024
a7bbd75
Fix failures on GNR for AVX10v1
khushal1996 May 12, 2024
6fc9d38
Disable template tests disabled for Avx512
khushal1996 May 14, 2024
ebd3ee3
Distinguish between Avx10v1 and Avx10v1/512, Add appropriate comments…
khushal1996 May 14, 2024
f426baa
Remove duplicate code and rather use a single if condition
khushal1996 May 15, 2024
455d754
Use bool instead of compIsa checks where possible
khushal1996 May 16, 2024
52a5aa0
remove duplication of code in shuffle
khushal1996 May 16, 2024
20e022b
resolve review comments. Make evex encoding checks clear to read and …
khushal1996 May 16, 2024
2d8fc4c
Add FMA and Avx512F.X64 instructions to AVX10v1. Restructure code and…
khushal1996 May 22, 2024
79b2a52
Combine compOpportunistic checks with Avx10 check using IsAvx10OrIsaS…
khushal1996 May 22, 2024
c38b62f
Introduce a new internal ISA InstructionSet_EVEX and remove Instructi…
khushal1996 Jun 3, 2024
7a51e1e
Addressing review comments. resolving errors introduced when merged w…
khushal1996 Jun 3, 2024
b1509c4
fix formatting
khushal1996 Jun 4, 2024
e21bcff
Reorder declaration of InstructionSet_EVEX to proper position. Run fo…
khushal1996 Jun 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 84 additions & 80 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 227e46fa-1be3-4770-b613-4a239e7c28aa */
0x227e46fa,
0x1be3,
0x4770,
{0xb6, 0x13, 0x4a, 0x23, 0x9e, 0x7c, 0x28, 0xaa}
constexpr GUID JITEEVersionIdentifier = { /* 6e0b439f-0d18-4836-a486-4962af0cc948 */
0x6e0b439f,
0x0d18,
0x4836,
{0xa4, 0x86, 0x49, 0x62, 0xaf, 0x0c, 0xc9, 0x48}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Rcpc2=42,
READYTORUN_INSTRUCTION_Sve=43,
READYTORUN_INSTRUCTION_Avx10v1=44,
READYTORUN_INSTRUCTION_Avx10v1_V256=45,
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,

};

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3184,8 +3184,8 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_AVX512F_CompareEqualMask:
case NI_AVX512F_CompareNotEqualMask:
case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
Expand Down
34 changes: 32 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,22 @@ void CodeGen::genGenerateMachineCode()
#if defined(TARGET_X86)
if (compiler->canUseEvexEncoding())
{
printf("X86 with AVX512");
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
printf("X86 with AVX10/512");
}
else
{
printf("X86 with AVX10/256");
}
}
else
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
printf("X86 with AVX512");
}
}
else if (compiler->canUseVexEncoding())
{
Expand All @@ -1840,7 +1855,22 @@ void CodeGen::genGenerateMachineCode()
#elif defined(TARGET_AMD64)
if (compiler->canUseEvexEncoding())
{
printf("X64 with AVX512");
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
{
printf("X86 with AVX10/512");
}
else
{
printf("X86 with AVX10/256");
}
}
else
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
printf("X86 with AVX512");
}
}
else if (compiler->canUseVexEncoding())
{
Expand Down
37 changes: 24 additions & 13 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand All @@ -492,7 +492,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -521,7 +521,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand All @@ -548,7 +548,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -667,7 +667,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
{
if (emitter::isHighSimdReg(targetReg))
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, EA_16BYTE, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
}
Expand Down Expand Up @@ -5654,6 +5654,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512F_ExtractVector256:
case NI_AVX512DQ_ExtractVector128:
case NI_AVX512DQ_ExtractVector256:
case NI_AVX10v1_V512_ExtractVector128:
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
case NI_AVX10v1_V512_ExtractVector256:
{
// These intrinsics are "ins reg/mem, xmm, imm8"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
Expand Down Expand Up @@ -5682,6 +5684,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32:
case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
case NI_AVX10v1_ConvertToVector128UInt32:
case NI_AVX10v1_ConvertToVector128UInt32WithSaturation:
{
assert(!varTypeIsFloating(baseType));
FALLTHROUGH;
Expand Down Expand Up @@ -5719,6 +5723,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
case NI_AVX512BW_VL_ConvertToVector128SByte:
case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Byte:
case NI_AVX10v1_ConvertToVector128ByteWithSaturation:
case NI_AVX10v1_ConvertToVector128Int16:
case NI_AVX10v1_ConvertToVector128Int16WithSaturation:
case NI_AVX10v1_ConvertToVector128Int32:
case NI_AVX10v1_ConvertToVector128Int32WithSaturation:
case NI_AVX10v1_ConvertToVector128SByte:
case NI_AVX10v1_ConvertToVector128SByteWithSaturation:
case NI_AVX10v1_ConvertToVector128UInt16:
case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
{
// These intrinsics are "ins reg/mem, xmm"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
Expand Down Expand Up @@ -7324,13 +7338,11 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
// Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
// here since they should have been lowered appropriately.
noway_assert(srcType != TYP_UINT);
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || compiler->canUseEvexEncodingDebugOnly());

if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) &&
compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) && compiler->canUseEvexEncoding())
{
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
assert(compiler->canUseEvexEncodingDebugOnly());
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
Expand Down Expand Up @@ -7458,13 +7470,12 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
// into a helper call by either front-end or lowering phase, unless we have AVX512F
// accelerated conversions.
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
compiler->canUseEvexEncodingDebugOnly());

// If the dstType is TYP_UINT, we have 32-bits to encode the
// float number. Any of 33rd or above bits can be the sign bit.
// To achieve it we pretend as if we are converting it to a long.
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) && !compiler->canUseEvexEncoding())
{
dstType = TYP_LONG;
}
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2307,7 +2307,6 @@ void Compiler::compSetProcessor()
{
instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
}

// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
// These have been shipped together historically and at the time of this writing
// there exists no hardware which doesn't support the entire feature set. To simplify
Expand Down
94 changes: 93 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -9549,6 +9549,14 @@ class Compiler
return opts.compSupportsISA.HasInstructionSet(isa);
}

// Following cases should be taken into consideration when using the below APIs:
// InstructionSet_EVEX implies Avx10v1 -or- Avx512F+CD+DQ+BW+VL and can be used for 128-bit or 256-bit EVEX encoding
// instructions in these instruction sets InstructionSet_Avx10v1_V512 should never be queried directly, it is
// covered by querying Avx512* InstructionSet_Avx512F (and same for BW, CD, DQ) is only queried for 512-bit EVEX
// encoded instructions
// InstructionSet_Avx10v1 is only queried for cases like 128-bit/256-bit instructions that wouldn't be in
// F+CD+DQ+BW+VL (such as VBMI) and should appear with a corresponding query around AVX512*_VL (i.e. AVX512_VBMI_VL)

#ifdef DEBUG
//------------------------------------------------------------------------
// IsBaselineVector512IsaSupportedDebugOnly - Does isa support exist for Vector512.
Expand All @@ -9562,6 +9570,42 @@ class Compiler
return compIsaSupportedDebugOnly(InstructionSet_AVX512F);
#else
return false;
#endif
}

//------------------------------------------------------------------------
// canUseEvexEncodingDebugOnly - Answer the question: Is Evex encoding supported on this target.
//
// Returns:
// `true` if Evex encoding is supported, `false` if not.
//
bool canUseEvexEncodingDebugOnly() const
{
#ifdef TARGET_XARCH
return (compIsaSupportedDebugOnly(InstructionSet_EVEX));
#else
return false;
#endif
}

//------------------------------------------------------------------------
// IsAvx10OrIsaSupportedDebugOnly - Answer the question: Is AVX10v1 or the given ISA supported.
//
// Returns:
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
//
bool IsAvx10OrIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const
{
#ifdef TARGET_XARCH
// For the below cases, check for evex encoding should be used.
assert(isa != InstructionSet_AVX512F || isa != InstructionSet_AVX512F_VL || isa != InstructionSet_AVX512BW ||
isa != InstructionSet_AVX512BW_VL || isa != InstructionSet_AVX512CD ||
isa != InstructionSet_AVX512CD_VL || isa != InstructionSet_AVX512DQ ||
isa != InstructionSet_AVX512DQ_VL);

return (compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || compIsaSupportedDebugOnly(isa));
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
#else
return false;
#endif
}
#endif // DEBUG
Expand All @@ -9581,6 +9625,21 @@ class Compiler
#endif
}

//------------------------------------------------------------------------
// IsAvx10OrIsaSupportedOpportunistically - Does opportunistic isa support exist for AVX10v1 or the given ISA.
//
// Returns:
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
//
bool IsAvx10OrIsaSupportedOpportunistically(CORINFO_InstructionSet isa) const
{
#ifdef TARGET_XARCH
return (compOpportunisticallyDependsOn(InstructionSet_AVX10v1) || compOpportunisticallyDependsOn(isa));
#else
return false;
#endif
}

bool canUseEmbeddedBroadcast() const
{
return JitConfig.EnableEmbeddedBroadcast();
Expand All @@ -9593,6 +9652,35 @@ class Compiler

#ifdef TARGET_XARCH
public:

//------------------------------------------------------------------------
// compIsEvexOpportunisticallySupported - Checks for whether AVX10v1 or avx512InstructionSet is supported
// opportunistically.
//
// Returns:
// returns true if AVX10v1 or avx512InstructionSet is supported opportunistically and
// sets isV512Supported to true if AVX512F is supported, false otherwise.
//
bool compIsEvexOpportunisticallySupported(bool& isV512Supported,
CORINFO_InstructionSet avx512InstructionSet = InstructionSet_AVX512F)
{
assert(avx512InstructionSet == InstructionSet_AVX512F || avx512InstructionSet == InstructionSet_AVX512F_VL ||
khushal1996 marked this conversation as resolved.
Show resolved Hide resolved
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
avx512InstructionSet == InstructionSet_AVX512BW || avx512InstructionSet == InstructionSet_AVX512BW_VL ||
avx512InstructionSet == InstructionSet_AVX512CD || avx512InstructionSet == InstructionSet_AVX512CD_VL ||
avx512InstructionSet == InstructionSet_AVX512DQ || avx512InstructionSet == InstructionSet_AVX512DQ_VL ||
avx512InstructionSet == InstructionSet_AVX512VBMI ||
avx512InstructionSet == InstructionSet_AVX512VBMI_VL);

if (compOpportunisticallyDependsOn(avx512InstructionSet))
{
isV512Supported = true;
return true;
}

isV512Supported = false;
return compOpportunisticallyDependsOn(InstructionSet_AVX10v1);
}

bool canUseVexEncoding() const
{
return compOpportunisticallyDependsOn(InstructionSet_AVX);
Expand All @@ -9606,7 +9694,7 @@ class Compiler
//
bool canUseEvexEncoding() const
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
return (compOpportunisticallyDependsOn(InstructionSet_EVEX));
}

private:
Expand Down Expand Up @@ -9636,6 +9724,10 @@ class Compiler

return true;
}
else if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
return true;
}
#endif // DEBUG

return false;
Expand Down
Loading
Loading