Skip to content

Commit

Permalink
JIT ARM64-SVE: Implement IF_SVE_DL_2A, IF_SVE_DZ_1A, IF_SVE_EA_1A (do…
Browse files Browse the repository at this point in the history
  • Loading branch information
amanasifkhalid authored and tmds committed Jan 23, 2024
1 parent 0ab6cfc commit e9963f0
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 13 deletions.
33 changes: 33 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5181,6 +5181,24 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_I(INS_sve_uqrshrn, EA_SCALABLE, REG_V15, REG_V12, 1,
INS_OPTS_SCALABLE_H); // UQRSHRN <Zd>.H, {<Zn1>.S-<Zn2>.S }, #<const>

// IF_SVE_DL_2A
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R0, REG_P0, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_VL_2X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R1, REG_P1, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_VL_4X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R2, REG_P2, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_VL_2X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R3, REG_P3, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_VL_4X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R4, REG_P4, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_VL_2X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R5, REG_P5, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_VL_4X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R6, REG_P6, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_VL_2X); // CNTP <Xd>, <PNn>.<T>, <vl>
theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R7, REG_P7, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_VL_4X); // CNTP <Xd>, <PNn>.<T>, <vl>

// IF_SVE_DM_2A
theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R0, REG_P0, INS_OPTS_SCALABLE_B); // DECP <Xdn>, <Pm>.<T>
theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R1, REG_P1, INS_OPTS_SCALABLE_H); // DECP <Xdn>, <Pm>.<T>
Expand Down Expand Up @@ -5494,6 +5512,21 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_8BYTE, REG_P15, REG_R14, REG_R15, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_VL_4X); // WHILELT <PNd>.<T>, <Xn>, <Xm>, <vl>

// IF_SVE_DZ_1A
theEmitter->emitIns_R(INS_sve_ptrue, EA_SCALABLE, REG_P8, INS_OPTS_SCALABLE_B); // PTRUE <PNd>.<T>
theEmitter->emitIns_R(INS_sve_ptrue, EA_SCALABLE, REG_P9, INS_OPTS_SCALABLE_H); // PTRUE <PNd>.<T>
theEmitter->emitIns_R(INS_sve_ptrue, EA_SCALABLE, REG_P10, INS_OPTS_SCALABLE_S); // PTRUE <PNd>.<T>
theEmitter->emitIns_R(INS_sve_ptrue, EA_SCALABLE, REG_P11, INS_OPTS_SCALABLE_D); // PTRUE <PNd>.<T>

// IF_SVE_EA_1A
// Note: B is reserved
theEmitter->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, REG_V0, 2.0, INS_OPTS_SCALABLE_H); // FDUP <Zd>.<T>, #<const>
theEmitter->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, REG_V1, 1.0, INS_OPTS_SCALABLE_S); // FDUP <Zd>.<T>, #<const>
theEmitter->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, REG_V2, 2.0, INS_OPTS_SCALABLE_D); // FDUP <Zd>.<T>, #<const>
theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V3, -10.0, INS_OPTS_SCALABLE_H); // FMOV <Zd>.<T>, #<const>
theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V4, -0.125, INS_OPTS_SCALABLE_S); // FMOV <Zd>.<T>, #<const>
theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V5, 31.0, INS_OPTS_SCALABLE_D); // FMOV <Zd>.<T>, #<const>

// IF_SVE_IH_3A
theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V5, REG_P3, REG_R4, 0,
INS_OPTS_SCALABLE_D); // LD1D {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
Expand Down
149 changes: 139 additions & 10 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1242,14 +1242,16 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isScalableVectorSize(id->idOpSize()));
break;

case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
assert(isValidGeneralDatasize(id->idOpSize())); // X
case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
assert(id->idOpSize() == EA_8BYTE);

FALLTHROUGH;
case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
assert(insOptsScalableStandard(id->idInsOpt())); // xx
assert(isGeneralRegister(id->idReg1())); // ddddd
assert(isPredicateRegister(id->idReg2())); // MMMM
assert(insOptsScalableStandard(id->idInsOpt()));
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
assert(isGeneralRegister(id->idReg1())); // ddddd
assert(isPredicateRegister(id->idReg2())); // MMMM
assert(isValidGeneralDatasize(id->idOpSize()));
break;

Expand Down Expand Up @@ -1339,6 +1341,19 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
break;

case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
assert(insOptsScalableStandard(id->idInsOpt()));
assert(isHighPredicateRegister(id->idReg1())); // DDD
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
break;

case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
assert(isValidUimm8(emitGetInsSC(id))); // iiiiiiii
break;

case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
// immediate)
case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
Expand Down Expand Up @@ -5857,7 +5872,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
* Add an instruction referencing a single register.
*/

void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */)
{
insFormat fmt = IF_NONE;
instrDesc* id = emitNewInstrSmall(attr);
Expand Down Expand Up @@ -5900,6 +5915,15 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
fmt = IF_SVE_DR_1A;
break;

case INS_sve_ptrue:
assert(insOptsScalableStandard(opt));
assert(isHighPredicateRegister(reg)); // DDD
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
id->idReg1(reg);
id->idInsOpt(opt);
fmt = IF_SVE_DZ_1A;
break;

default:
unreached();
}
Expand Down Expand Up @@ -6251,6 +6275,21 @@ void emitter::emitIns_R_F(
}
break;

case INS_sve_fmov:
case INS_sve_fdup:
assert(insOptsScalableAtLeastHalf(opt));
assert(isVectorRegister(reg)); // ddddd
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx

fpi.immFPIVal = 0;
canEncode = canEncodeFloatImm8(immDbl, &fpi);
imm = fpi.immFPIVal;
fmt = IF_SVE_EA_1A;

// FMOV is an alias for FDUP, and is always the preferred disassembly.
ins = INS_sve_fmov;
break;

default:
unreached();
break;
Expand Down Expand Up @@ -6444,8 +6483,12 @@ void emitter::emitIns_Mov(
* Add an instruction referencing two registers
*/

void emitter::emitIns_R_R(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */)
void emitter::emitIns_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
insOpts opt /* = INS_OPTS_NONE */,
insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
{
if (IsMovInstruction(ins))
{
Expand Down Expand Up @@ -7090,6 +7133,15 @@ void emitter::emitIns_R_R(
}
break;

case INS_sve_cntp:
assert(insOptsScalableStandard(opt));
assert(insScalableOptsWithVectorLength(sopt)); // l
assert(isGeneralRegister(reg1)); // ddddd
assert(isPredicateRegister(reg2)); // NNNN
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
fmt = IF_SVE_DL_2A;
break;

case INS_sve_incp:
case INS_sve_decp:
assert(isPredicateRegister(reg2)); // MMMM
Expand Down Expand Up @@ -7181,7 +7233,17 @@ void emitter::emitIns_R_R(

assert(fmt != IF_NONE);

instrDesc* id = emitNewInstrSmall(attr);
instrDesc* id;

if (insScalableOptsWithVectorLength(sopt))
{
id = emitNewInstr(attr);
id->idVectorLength4x(sopt == INS_SCALABLE_OPTS_VL_4X);
}
else
{
id = emitNewInstrSmall(attr);
}

id->idIns(ins);
id->idInsFmt(fmt);
Expand Down Expand Up @@ -13090,7 +13152,16 @@ void emitter::emitIns_Call(EmitCallType callType,

if (id->idVectorLength4x())
{
return 0x2000; // set the bit at location 13
switch (id->idInsFmt())
{
case IF_SVE_DL_2A:
return 0x400; // set the bit at location 10
case IF_SVE_DY_3A:
return 0x2000; // set the bit at location 13
default:
assert(!"Unexpected format");
break;
}
}

return 0;
Expand Down Expand Up @@ -16380,6 +16451,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeVectorLengthSpecifier(id); // l
code |= insEncodeReg_R_4_to_0(id->idReg1()); // ddddd
code |= insEncodeReg_P_8_to_5(id->idReg2()); // NNNN
code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_R_4_to_0(id->idReg1()); // ddddd
Expand Down Expand Up @@ -16478,6 +16558,21 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_P_2_to_0(id->idReg1()); // DDD
code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
code |= ((code_t)emitGetInsSC(id) << 5); // iiiiiiii
code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
Expand Down Expand Up @@ -19032,6 +19127,13 @@ void emitter::emitDispInsHelp(
emitDispImm(emitGetInsSC(id), false); // iiii
break;

// <Xd>, <PNn>.<T>, <vl>
case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd
emitDispPredicateReg(id->idReg2(), PREDICATE_SIZED, id->idInsOpt(), true); // NNNN
emitDispVectorLengthSpecifier(id);
break;

// <Xdn>, <Pm>.<T>
case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd
Expand Down Expand Up @@ -19130,6 +19232,18 @@ void emitter::emitDispInsHelp(
emitDispVectorLengthSpecifier(id);
break;

// PTRUE <PNd>.<T>
case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), false); // DDD
break;

// FDUP <Zd>.<T>, #<const>
// FMOV <Zd>.<T>, #<const>
case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii
break;

// { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
// Some of these formats may allow changing the element size instead of using 'D' for all instructions.
case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
Expand Down Expand Up @@ -21747,6 +21861,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
}
break;

case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
Expand Down Expand Up @@ -21802,6 +21921,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_3C;
break;

case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
// immediate)
case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ static bool isValidUimm5(ssize_t value)
return (0 <= value) && (value <= 0x1FLL);
};

// Returns true if 'value' is a legal unsigned immediate 8 bit encoding (such as for fMOV).
// Returns true if 'value' is a legal unsigned immediate 8 bit encoding (such as for FMOV).
static bool isValidUimm8(ssize_t value)
{
return (0 <= value) && (value <= 0xFFLL);
Expand Down Expand Up @@ -1066,7 +1066,7 @@ void emitIns(instruction ins);

void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);

void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
void emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt = INS_OPTS_NONE);

void emitIns_R_I(instruction ins,
emitAttr attr,
Expand All @@ -1080,7 +1080,12 @@ void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, i
void emitIns_Mov(
instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);

void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE);
void emitIns_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
insOpts opt = INS_OPTS_NONE,
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);

void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
{
Expand Down

0 comments on commit e9963f0

Please sign in to comment.