Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_MultiCoreJitNoProfileGather, W("MultiCoreJitNo

#endif

#ifdef TARGET_ARM64
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitUseScalableVectorT, W("JitUseScalableVectorT"), 0, "Accelerate Vector<T> with SVE if available.")
#endif

///
/// Loader heap
///
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/corhdr.h
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,7 @@ typedef enum CorInfoHFAElemType : unsigned {
CORINFO_HFA_ELEM_DOUBLE,
CORINFO_HFA_ELEM_VECTOR64,
CORINFO_HFA_ELEM_VECTOR128,
CORINFO_HFA_ELEM_VECTORT,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've noted down to revisit this on the issue, as technically a Vector<T> would be a 'Pure Scalable Type' not a HFA/HVA. It looks very similar in principal but there may be some subtle differences.

} CorInfoHFAElemType;

//
Expand Down
24 changes: 20 additions & 4 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,15 @@ bool CodeGen::genInstrWithConstant(instruction ins,
immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
break;

case INS_sve_ldr:
case INS_sve_str:
{
assert(size == EA_SCALABLE);
ssize_t count = imm / genTypeSize(TYP_SIMDSV);
immFitsInIns = (-256 <= count && count < 256);
}
break;

default:
assert(!"Unexpected instruction in genInstrWithConstant");
break;
Expand Down Expand Up @@ -2075,10 +2084,14 @@ void CodeGen::instGen_Set_Reg_To_Base_Plus_Imm(emitAttr size,
// If the imm values < 12 bits, we can use a single "add rsvd, reg2, #imm".
// Otherwise, use "mov rsvd, #imm", followed up "add rsvd, reg2, rsvd".

if (imm < 4096)
if (0 <= imm && imm < 4096)
{
GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, dstReg, baseReg, imm);
}
else if (-4095 <= imm && imm < 0)
{
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, dstReg, baseReg, -imm);
}
else
{
instGen_Set_Reg_To_Imm(size, dstReg, imm);
Expand Down Expand Up @@ -2274,6 +2287,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre

switch (tree->TypeGet())
{
case TYP_SIMDSV:
attr = EA_16BYTE; // TODO-SVE: Implement scalable vector constant
FALLTHROUGH;
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
Expand Down Expand Up @@ -2999,7 +3015,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
}
}
emitAttr attr = emitActualTypeSize(targetType);
GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired);
inst_Mov(targetType, retReg, op1->GetRegNum(), !movRequired, attr);
}

/***********************************************************************************************
Expand Down Expand Up @@ -5306,7 +5322,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node)

GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);
assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers

regNumber tgtReg = node->GetRegNum();
assert(tgtReg != REG_NA);
Expand Down Expand Up @@ -5362,7 +5378,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node)

GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);
assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers

regNumber srcReg = node->GetRegNum();
assert(srcReg != REG_NA);
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
#endif // TARGET_ARM64
{
emitAttr storeAttr = emitTypeSize(source->TypeGet());
emit->emitIns_S_R(INS_str, storeAttr, srcReg, varNumOut, argOffsetOut);
emit->emitIns_S_R(ins_Store(source->TypeGet()), storeAttr, srcReg, varNumOut, argOffsetOut);
#ifdef TARGET_ARM64
argOffsetOut +=
storeAttr == EA_SCALABLE ? compiler->getVectorTByteLength() : EA_SIZE_IN_BYTES(storeAttr);
#else
argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
#endif
}
assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
return;
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3266,6 +3266,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
#if defined(TARGET_ARM64)
// On arm64 SIMD parameters are HFAs and passed in multiple float
// registers while we can enregister them as single registers.
// TODO-SVE: Ensure this works for Z registers as well.
GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), node->reg, sourceReg,
edge->destOffset / genTypeSize(edge->type), 0);
#elif defined(UNIX_AMD64_ABI)
Expand Down Expand Up @@ -5906,7 +5907,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
var_types hfaType = GetHfaType(hClass);
unsigned classSize = info.compCompHnd->getClassSize(hClass);
// Note that the retail build issues a warning about a potential division by zero without the Max function
unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
unsigned elemSize = Max((unsigned)1, genTypeSize(genActualType(hfaType)));
return classSize / elemSize;
#endif // TARGET_ARM64
}
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2269,8 +2269,19 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)
genLongToIntCast(tree);
}
#endif // !TARGET_64BIT
#ifdef TARGET_ARM64
else if (targetType == TYP_SIMDSV || tree->gtOp1->TypeGet() == TYP_SIMDSV)
{
// TODO-SVE: Can we avoid generating these casts altogether?
assert(genTypeSize(tree->CastToType()) == genTypeSize(tree->CastFromType()));
genConsumeOperands(tree);
inst_Mov(tree->CastToType(), tree->GetRegNum(), tree->gtOp1->GetRegNum(), true);
genProduceReg(tree);
}
#endif
else
{
assert(varTypeIsIntegral(targetType) && varTypeIsIntegral(tree->gtOp1));
// Casts int <--> int
genIntToIntCast(tree->AsCast());
}
Expand Down
38 changes: 30 additions & 8 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ inline bool _our_GetThreadCycles(uint64_t* cycleOut)

#endif // which host OS

const BYTE genTypeSizes[] = {
BYTE _initGenTypeSizes[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz,
#include "typelist.h"
#undef DEF_TP
};
const BYTE (&genTypeSizes)[TYP_COUNT] = _initGenTypeSizes;

const BYTE genTypeAlignments[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) al,
Expand Down Expand Up @@ -609,13 +610,18 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
// Start by determining if we have an HFA/HVA with a single element.
if (GlobalJitOptions::compFeatureHfa)
{
switch (structSize)
{
case 4:
case 8:
if (structSize == 4 ||
structSize == 8
#ifdef TARGET_ARM64
case 16:
#endif // TARGET_ARM64
// Can pass in V register if structSize == 16, and Z registers for structs with sizes in
// multiples of 16-bytes, depending on hardware availability.
|| structSize == 16 || ((structSize % 16 == 0) && (structSize == genTypeSize(TYP_SIMDSV)))
#endif
)
{
var_types hfaType = GetHfaType(clsHnd);
// We're only interested in the case where the struct size is equal to the size of the hfaType.
if (varTypeIsValidHfaType(hfaType))
{
var_types hfaType = GetHfaType(clsHnd);
// We're only interested in the case where the struct size is equal to the size of the hfaType.
Expand Down Expand Up @@ -861,7 +867,15 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
// The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
// so we can skip calling getPrimitiveTypeForStruct when we
// have a struct that is larger than that.
if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
//
// On ARM64 we can pass structures in scalable vector registers
// which may allow larger structures on some hardware.
#ifdef TARGET_ARM64
unsigned maxStructSize = max((unsigned)MAX_PASS_SINGLEREG_BYTES, getVectorTByteLength());
#else
unsigned maxStructSize = MAX_PASS_SINGLEREG_BYTES;
#endif
if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= maxStructSize))
{
// We set the "primitive" useType based upon the structSize
// and also examine the clsHnd to see if it is an HFA of count one
Expand Down Expand Up @@ -6795,6 +6809,14 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
break;
}

#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
// Initialize the size of Vector<T> from the EE.
_initGenTypeSizes[TYP_SIMDSV] = (BYTE)getVectorTByteLength();
_initGenTypeSizes[TYP_MASK] = (BYTE)getMaskByteLength();
assert(genTypeSize(TYP_SIMDSV) >= 16);
assert(genTypeSize(TYP_MASK) >= 2);
#endif

info.compRetType = JITtype2varType(methodInfo->args.retType);
if (info.compRetType == TYP_STRUCT)
{
Expand Down
24 changes: 22 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind)
return TYP_SIMD8;
case CORINFO_HFA_ELEM_VECTOR128:
return TYP_SIMD16;
#ifdef TARGET_ARM64
case CORINFO_HFA_ELEM_VECTORT:
return TYP_SIMDSV;
#endif
#endif
case CORINFO_HFA_ELEM_NONE:
return TYP_UNDEF;
Expand All @@ -178,6 +182,10 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type)
return CORINFO_HFA_ELEM_VECTOR64;
case TYP_SIMD16:
return CORINFO_HFA_ELEM_VECTOR128;
#ifdef TARGET_ARM64
case TYP_SIMDSV:
return CORINFO_HFA_ELEM_VECTORT;
#endif
#endif
case TYP_UNDEF:
return CORINFO_HFA_ELEM_NONE;
Expand Down Expand Up @@ -8212,7 +8220,7 @@ class Compiler
assert(type != TYP_STRUCT);
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
return ((type == TYP_SIMDSV) || (type == TYP_SIMD16) || (type == TYP_SIMD12));
}
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE")
Expand Down Expand Up @@ -9079,6 +9087,8 @@ class Compiler
return isSIMDClass(clsHnd) || isHWSIMDClass(clsHnd);
}

var_types getSIMDType(CORINFO_CLASS_HANDLE typeHnd, CorInfoType* baseType = nullptr);

// Get the base (element) type and size in bytes for a SIMD type. Returns CORINFO_TYPE_UNDEF
// if it is not a SIMD type or is an unsupported base JIT type.
CorInfoType getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes = nullptr);
Expand Down Expand Up @@ -9164,6 +9174,16 @@ class Compiler
#endif
}

#ifdef TARGET_ARM64
uint32_t getMaskByteLength()
{
// Predicate registers have 1 bit for each byte in the vector register.
// We round up to an int as the CLR prefers to work in integers.
assert((getVectorTByteLength() % 8) == 0);
return (uint32_t)roundUp((size_t)getVectorTByteLength() / 8, sizeof(int));
}
#endif

// The minimum and maximum possible number of bytes in a SIMD vector.

// getMaxVectorByteLength
Expand Down Expand Up @@ -12407,7 +12427,7 @@ const instruction INS_BREAKPOINT = INS_ebreak;

/*****************************************************************************/

extern const BYTE genTypeSizes[];
extern const BYTE (&genTypeSizes)[TYP_COUNT];
extern const BYTE genTypeAlignments[];
extern const BYTE genTypeStSzs[];
extern const BYTE genActualTypes[];
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,13 +1137,12 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask)
* Return the size in bytes of the given type.
*/

extern const BYTE genTypeSizes[TYP_COUNT];
extern const BYTE (&genTypeSizes)[TYP_COUNT];

template <class T>
inline unsigned genTypeSize(T value)
{
assert((unsigned)TypeGet(value) < ArrLen(genTypeSizes));

return genTypeSizes[TypeGet(value)];
}

Expand All @@ -1158,6 +1157,11 @@ extern const BYTE genTypeStSzs[TYP_COUNT];
template <class T>
inline unsigned genTypeStSz(T value)
{
#ifdef TARGET_ARM64
// The size of these types cannot be evaluated in static contexts.
noway_assert(TypeGet(value) != TYP_SIMDSV);
noway_assert(TypeGet(value) != TYP_MASK);
#endif
assert((unsigned)TypeGet(value) < ArrLen(genTypeStSzs));

return genTypeStSzs[TypeGet(value)];
Expand Down
34 changes: 17 additions & 17 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4266,6 +4266,13 @@ void emitter::emitIns_Mov(
{
assert(insOptsNone(opt));

if (attr == EA_SCALABLE)
{
// NEON mov is acceptable for scalable vectors when the vector byte length is 128-bit.
// TODO-SVE: This should not be permitted once Vector<T> has been migrated to SVE.
assert(codeGen->compiler->getVectorTByteLength() == 16);
}

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
// These instructions have no side effect and can be skipped
Expand Down Expand Up @@ -4340,6 +4347,7 @@ void emitter::emitIns_Mov(
case INS_fmov:
{
assert(isValidVectorElemsizeFloat(size));
assert(attr != EA_SCALABLE);

if (canSkip && (dstReg == srcReg))
{
Expand Down Expand Up @@ -4387,35 +4395,22 @@ void emitter::emitIns_Mov(

case INS_sve_mov:
{
assert(attr == EA_SCALABLE);
if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
{
assert((opt == INS_OPTS_SCALABLE_B) || insOptsNone(opt));
opt = INS_OPTS_SCALABLE_B;
attr = EA_SCALABLE;

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
opt = INS_OPTS_SCALABLE_B;
fmt = IF_SVE_CZ_4A_L;
}
else if (isVectorRegister(dstReg) && isVectorRegister(srcReg))
{
assert(insOptsScalable(opt));

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
assert(insOptsScalable(opt) || insOptsNone(opt));
opt = INS_OPTS_SCALABLE_D;
fmt = IF_SVE_AU_3A;
}
else if (isVectorRegister(dstReg) && isGeneralRegisterOrSP(srcReg))
{
assert(insOptsScalable(opt));
if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
srcReg = encodingSPtoZR(srcReg);
fmt = IF_SVE_CB_2A;
}
Expand All @@ -4424,6 +4419,11 @@ void emitter::emitIns_Mov(
unreached();
}

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}

break;
}
default:
Expand Down
Loading
Loading