Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26652,7 +26652,14 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
case NI_SSE2_MaskMove:
addr = Op(3);
break;
#endif // TARGET_XARCH
#elif defined(TARGET_ARM64)
case NI_Sve_Store:
case NI_Sve_Storex2:
case NI_Sve_Storex3:
case NI_Sve_Storex4:
addr = Op(2);
break;
#endif // TARGET_ARM64

default:
addr = Op(1);
Expand Down
53 changes: 53 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2370,6 +2370,59 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Sve_Store:
{
assert(sig->numArgs == 3);
assert(retType == TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
op3 = impPopStack().val;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);

if (op3->TypeGet() == TYP_STRUCT)
{
info.compNeedsConsecutiveRegisters = true;
switch (fieldCount)
{
case 2:
intrinsic = NI_Sve_Storex2;
break;

case 3:
intrinsic = NI_Sve_Storex3;
break;

case 4:
intrinsic = NI_Sve_Storex4;
break;

default:
assert("unsupported");
}

if (!op3->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("SveStoreN"));

impStoreToTemp(tmp, op3, CHECK_SPILL_NONE);
op3 = gtNewLclvNode(tmp, argType);
}
op3 = gtConvertTableOpToFieldList(op3, fieldCount);
}

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
break;
}

default:
{
return nullptr;
Expand Down
61 changes: 61 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,67 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;

case NI_Sve_Storex2:
case NI_Sve_Storex3:
case NI_Sve_Storex4:
{
assert(intrin.op3->OperIsFieldList());
GenTreeFieldList* fieldList = intrin.op3->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op3Reg = firstField->GetRegNum();

#ifdef DEBUG
unsigned regCount = 0;
regNumber argReg = op3Reg;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;

GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = getNextSIMDRegWithWraparound(argReg);
}

switch (ins)
{
case INS_sve_st2b:
case INS_sve_st2d:
case INS_sve_st2h:
case INS_sve_st2w:
case INS_sve_st2q:
assert(regCount == 2);
break;

case INS_sve_st3b:
case INS_sve_st3d:
case INS_sve_st3h:
case INS_sve_st3w:
case INS_sve_st3q:
assert(regCount == 3);
break;

case INS_sve_st4b:
case INS_sve_st4d:
case INS_sve_st4h:
case INS_sve_st4w:
case INS_sve_st4q:
assert(regCount == 4);
break;

default:
unreached();
}
#endif
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;
}

case NI_Sve_Store:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at #102180 reminded me that we can do the following....

For NI_Sve_Store remove the Special Code Gen flag and this case statement. This will cause the generic code to call emitIns_R_R_R().

You can now add a special case in emitIns_R_R_R() so that for NI_Sve_Store it just calls down to emitIns_R_R_R_I() with 0 for the immediate. Eg:

case INS_sve_ld1b:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious...

NI_AdvSimd_Store uses gtNewSimdStoreNode() which eventually gets generated in genCodeForStoreInd(). This has extra code, for example, isvolatile checks.

Meanwhile NI_AdvSimd_StoreSelectedScalar imports like a normal hwintrinsic and gets generated in hwintrinsiccodegenarm64.cpp. This is the same for the various other stores.

Is there a special reason for using genCodeForStoreInd?

Why do the other stores not use genCodeForStoreInd?

Should NI_Sve_Store use genCodeForStoreInd?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will have to double check on this one. Opened #102347

{
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;
}

case NI_Vector64_ToVector128:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Store, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand All @@ -136,6 +137,9 @@ HARDWARE_INTRINSIC(Sve, ZipLow,
HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation)
HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, Storex2, -1, 3, true, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, Storex3, -1, 3, true, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(Sve, Storex4, -1, 3, true, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)


#endif // FEATURE_HW_INTRINSIC
Expand Down
23 changes: 20 additions & 3 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_Sve_Storex2:
case NI_Sve_Storex3:
case NI_Sve_Storex4:
{
assert(intrin.op2 != nullptr);
assert(intrin.op3 != nullptr);
srcCount += BuildAddrUses(intrin.op2);
srcCount += BuildConsecutiveRegistersForUse(intrin.op3);
assert(dstCount == 0);
buildInternalRegisterUses();
*pDstCount = 0;
break;
}

default:
noway_assert(!"Not a supported as multiple consecutive register intrinsic");
}
Expand Down Expand Up @@ -1865,9 +1879,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
compiler->getSIMDInitTempVarNum(requiredSimdTempType);
}
}

if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) ||
(intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar))
else if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) ||
(intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar))
{
// If this is common pattern, then we will add a flag in the table, but for now, just check for specific
// intrinsics
Expand Down Expand Up @@ -1898,6 +1911,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
srcCount += BuildDelayFreeUses(intrinEmbOp2->Op(argNum), intrinEmbOp2->Op(1));
}
}
else if (intrin.id == NI_Sve_Store)
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
if (forceOp2DelayFree)
Expand Down
Loading