diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index bf402bc0538881..498e5ca5600cd1 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1624,6 +1624,14 @@ void CodeGen::genConsumeRegs(GenTree* tree) genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } + else if (tree->OperIsFieldList()) + { + for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses()) + { + GenTree* fieldNode = use.GetNode(); + genConsumeRegs(fieldNode); + } + } #endif else if (tree->OperIsLocalRead()) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8134c167bd1674..f27c6bd8ae02e3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6718,6 +6718,10 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, compBasicBlockID = 0; #endif +#ifdef TARGET_ARM64 + info.compNeedsConsecutiveRegisters = false; +#endif + /* Initialize emitter */ if (!compIsForInlining()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 517a5fae972c9d..2d84e5f4218cbc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2809,6 +2809,10 @@ class Compiler CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, CorInfoType simdBaseJitType); + +#ifdef TARGET_ARM64 + GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount); +#endif #endif // FEATURE_HW_INTRINSICS GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd); @@ -10061,6 +10065,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Number of class profile probes in this method unsigned compHandleHistogramProbeCount; +#ifdef TARGET_ARM64 + bool compNeedsConsecutiveRegisters; +#endif + } info; ReturnTypeDesc compRetTypeDesc; // ABI return type descriptor for the method diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 2d28b9fdfb7736..cda3b1725b6725 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1447,6 +1447,10 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) lvaGenericsContextInUse |= InlineeCompiler->lvaGenericsContextInUse; +#ifdef TARGET_ARM64 + info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters; +#endif + // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. // TODO-CQ: Implement hot/cold splitting of methods with switch tables. if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e2a336fcfef6c9..43d9384296557e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23016,6 +23016,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, op2->AsVecCon()->gtSimdVal = vecCns; return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -23885,6 +23886,38 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types type, return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// gtConvertTableOpToFieldList: Convert a operand that represents table of rows into +// field list, where each field represents a row in the table. +// +// Arguments: +// op -- Operand to convert. +// fieldCount -- Number of fields or rows present. +// +// Return Value: +// The GenTreeFieldList node. +// +GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) +{ + LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); + unsigned lclNum = lvaGetLclNum(opVarDsc); + unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; + } + return fieldList; +} +#endif // TARGET_ARM64 + GenTree* Compiler::gtNewSimdWithLowerNode(var_types type, GenTree* op1, GenTree* op2, diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index e698a277f2fa5e..c2848581954b6f 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -176,6 +176,9 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic supports some sort of containment analysis HW_Flag_SupportsContainment = 0x2000, + + // The intrinsic needs consecutive registers + HW_Flag_NeedsConsecutiveRegisters = 0x4000, #else #error Unsupported platform #endif @@ -751,6 +754,14 @@ struct HWIntrinsicInfo return (flags & HW_Flag_SpecialCodeGen) != 0; } +#ifdef TARGET_ARM64 + static bool NeedsConsecutiveRegisters(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_NeedsConsecutiveRegisters) != 0; + } +#endif + static bool HasRMWSemantics(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a09ca2d1e6d9bd..95b61635f87421 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1900,7 +1900,84 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = impAssignMultiRegTypeToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed)); break; } + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: + { + assert(sig->numArgs == 2); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + + if (op1->TypeGet() == TYP_STRUCT) + { + info.compNeedsConsecutiveRegisters = true; + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + + if (!op1->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); + } + + op1 = gtConvertTableOpToFieldList(op1, fieldCount); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + { + assert(sig->numArgs == 3); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = impPopStack().val; + op1 = impPopStack().val; + + if (op2->TypeGet() == TYP_STRUCT) + { + info.compNeedsConsecutiveRegisters = true; + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + + if (!op2->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); + + impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); + op2 = gtNewLclvNode(tmp, argType); + } + + op2 = gtConvertTableOpToFieldList(op2, fieldCount); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } default: { return nullptr; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7c0354959f3a7b..05ccf663db67ce 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1002,6 +1002,110 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) (emitSize == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B); break; + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: + { + unsigned regCount = 0; + if (intrin.op1->OperIsFieldList()) + { + GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op1Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op1Reg); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; +#ifdef DEBUG + + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); +#endif + } + } + else + { + regCount = 1; + op1Reg = intrin.op1->GetRegNum(); + } + + switch (regCount) + { + case 2: + ins = INS_tbl_2regs; + break; + case 3: + ins = INS_tbl_3regs; + break; + case 4: + ins = INS_tbl_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbl); + break; + } + + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + } + + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + { + assert(isRMW); + unsigned regCount = 0; + op1Reg = intrin.op1->GetRegNum(); + op3Reg = intrin.op3->GetRegNum(); + assert(targetReg != op3Reg); + if (intrin.op2->OperIsFieldList()) + { + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op2Reg); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; +#ifdef DEBUG + + GenTree* argNode = use.GetNode(); + + // registers should be consecutive + assert(argReg == argNode->GetRegNum()); + // and they should not interfere with targetReg + assert(targetReg != argReg); + argReg = REG_NEXT(argReg); +#endif + } + } + else + { + regCount = 1; + op2Reg = intrin.op2->GetRegNum(); + } + + switch (regCount) + { + case 2: + ins = INS_tbx_2regs; + break; + case 3: + ins = INS_tbx_3regs; + break; + case 4: + ins = INS_tbx_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbx); + break; + } + + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); + break; + } default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index a85ff2db6c53f6..72cf16c52ecf18 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -477,8 +477,8 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -651,8 +651,8 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, {INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 78e6a1296972e2..39e8e155bca8b8 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -496,6 +496,13 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { mask |= refPosition->registerAssignment; } + +#ifdef TARGET_ARM64 + if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + mask |= LsraLimitFPSetForConsecutive; + } +#endif } return mask; @@ -1292,6 +1299,10 @@ PhaseStatus LinearScan::doLinearScan() compiler->codeGen->regSet.rsClearRegsModified(); initMaxSpill(); + +#ifdef TARGET_ARM64 + nextConsecutiveRefPositionMap = nullptr; +#endif buildIntervals(); DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS)); compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD); @@ -1299,7 +1310,18 @@ PhaseStatus LinearScan::doLinearScan() DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals")); initVarRegMaps(); - allocateRegisters(); + +#ifdef TARGET_ARM64 + if (compiler->info.compNeedsConsecutiveRegisters) + { + allocateRegisters(); + } + else +#endif // TARGET_ARM64 + { + allocateRegisters(); + } + allocationPassComplete = true; compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); resolveRegisters(); @@ -2819,11 +2841,13 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // of all but also has a weight lower than 'refPosition'. If there is // no such ref position, no register will be allocated. // - +template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + regMaskTP foundRegBit = + regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + if (foundRegBit == RBM_NONE) { return REG_NA; @@ -3062,8 +3086,12 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // We shouldn't be calling this if we haven't already determined that the register is not // busy until the next kill. assert(!isRegBusy(physRegRecord->regNum, current->registerType)); - // We should already have determined that the register isn't actively in use. +// We should already have determined that the register isn't actively in use. +#ifdef TARGET_ARM64 + assert(!isRegInUse(physRegRecord->regNum, current->registerType) || refPosition->needsConsecutive); +#else assert(!isRegInUse(physRegRecord->regNum, current->registerType)); +#endif // We shouldn't be calling this if 'refPosition' is a fixed reference to this register. assert(!refPosition->isFixedRefOfRegMask(candidateBit)); // We shouldn't be calling this if there is a fixed reference at the same location @@ -3081,11 +3109,8 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R { canSpill = canSpillReg(physRegRecord, refLocation); } - if (!canSpill) - { - return false; - } - return true; + + return canSpill; } // Grab a register to use to copy and then immediately use. @@ -3096,6 +3121,7 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // +template regNumber LinearScan::assignCopyReg(RefPosition* refPosition) { Interval* currentInterval = refPosition->getInterval(); @@ -3118,7 +3144,9 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) refPosition->copyReg = true; RegisterScore registerScore = NONE; - regNumber allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + regNumber allocatedReg = + allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + assert(allocatedReg != REG_NA); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, allocatedReg, nullptr, registerScore)); @@ -4567,6 +4595,9 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) // LinearScan::allocateRegisters: Perform the actual register allocation by iterating over // all of the previously constructed Intervals // +#ifdef TARGET_ARM64 +template +#endif void LinearScan::allocateRegisters() { JITDUMP("*************** In LinearScan::allocateRegisters()\n"); @@ -5324,34 +5355,158 @@ void LinearScan::allocateRegisters() } else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { - currentRefPosition.registerAssignment = assignedRegBit; - if (!currentInterval->isActive) +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - // If we've got an exposed use at the top of a block, the - // interval might not have been active. Otherwise if it's a use, - // the interval must be active. - if (refType == RefTypeDummyDef) + // For consecutive registers, if the first RefPosition is already assigned to a register, + // check if consecutive registers are free so they can be assigned to the subsequent + // RefPositions. + if (canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) { - currentInterval->isActive = true; - assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); } else { - currentRefPosition.reload = true; + // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so + // it would be possible to allocate consecutive registers to the subsequent RefPositions. + regNumber copyReg = assignCopyReg(¤tRefPosition); + + if (copyReg != assignedRegister) + { + lastAllocatedRefPosition = ¤tRefPosition; + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + + // For consecutive register, it doesn't matter what the assigned register was. + // We have just assigned it `copyRegMask` and that's the one in-use, and not the + // one that was assigned previously. + + regsInUseThisLocation |= copyRegMask; + if (currentRefPosition.lastUse) + { + if (currentRefPosition.delayRegFree) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= copyRegMask | assignedRegMask; + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } + else + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, + assignedRegister)); + regsToFree |= copyRegMask | assignedRegMask; + } + } + else + { + copyRegsToFree |= copyRegMask; + if (currentRefPosition.delayRegFree) + { + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } + } + + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. + if (!currentInterval->isLocalVar) + { + currentRefPosition.moveReg = true; + currentRefPosition.copyReg = false; + } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + } + else + { + // We first noticed that with assignedRegister, we were not getting consecutive registers + // assigned, so we decide to perform copyReg. However, copyReg assigned same register + // because there were no other free registers that would satisfy the consecutive registers + // requirements. In such case, just revert the copyReg state update. + currentRefPosition.copyReg = false; + + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + } + + assignConsecutiveRegisters(¤tRefPosition, copyReg); + continue; + } + } + else +#endif + { + currentRefPosition.registerAssignment = assignedRegBit; + if (!currentInterval->isActive) + { + // If we've got an exposed use at the top of a block, the + // interval might not have been active. Otherwise if it's a use, + // the interval must be active. + if (refType == RefTypeDummyDef) + { + currentInterval->isActive = true; + assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); + } + else + { + currentRefPosition.reload = true; + } } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } else { // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition.refType)) { - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg; +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && + currentRefPosition.refType == RefTypeUse) + { + copyReg = assignCopyReg(¤tRefPosition); + } + else +#endif + { + copyReg = assignCopyReg(¤tRefPosition); + } + lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - regsInUseThisLocation |= copyRegMask | assignedRegMask; + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + // If the first RefPosition was not assigned to the register that we wanted, we added + // a copyReg for it. Allocate subsequent RefPositions with the consecutive + // registers. + assignConsecutiveRegisters(¤tRefPosition, copyReg); + } + + // For consecutive register, it doesn't matter what the assigned register was. + // We have just assigned it `copyRegMask` and that's the one in-use, and not the + // one that was assigned previously. + + regsInUseThisLocation |= copyRegMask; + } + else +#endif + { + regsInUseThisLocation |= copyRegMask | assignedRegMask; + } if (currentRefPosition.lastUse) { if (currentRefPosition.delayRegFree) @@ -5405,6 +5560,48 @@ void LinearScan::allocateRegisters() } } +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + // For consecutive register, we would like to assign a register (if not already assigned) + // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + if (assignedRegister != REG_NA) + { + // For the 1st refPosition, if it already has a register assigned, then just assign + // subsequent registers to the remaining position and skip the allocation for the + // 1st refPosition altogether. + + if (!canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) + { + // The consecutive registers are busy. Force to allocate even for the 1st + // refPosition + assignedRegister = REG_NA; + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); + currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); + } + } + } + else if (currentRefPosition.refType == RefTypeUse) + { + // remaining refPosition of the series... + if (assignedRegBit == currentRefPosition.registerAssignment) + { + // For the subsequent position, if they already have the subsequent register assigned, then + // no need to find register to assign. + allocate = false; + } + else + { + // If the subsequent refPosition is not assigned to the consecutive register, then reassign the + // right consecutive register. + assignedRegister = REG_NA; + } + } + } +#endif // TARGET_ARM64 + if (assignedRegister == REG_NA) { if (currentRefPosition.RegOptional()) @@ -5455,7 +5652,22 @@ void LinearScan::allocateRegisters() { unassignPhysReg(currentInterval->assignedReg, nullptr); } - assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + assignedRegister = + allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); + } + } + else +#endif // TARGET_ARM64 + { + assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + } } // If no register was found, this RefPosition must not require a register. @@ -11459,19 +11671,38 @@ void LinearScan::RegisterSelection::try_SPILL_COST() // The spill weight for the best candidate we've found so far. weight_t bestSpillWeight = FloatingPointUtils::infinite_double(); // True if we found registers with lower spill weight than this refPosition. - bool foundLowerSpillWeight = false; + bool foundLowerSpillWeight = false; + LsraLocation thisLocation = refPosition->nodeLocation; for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) { regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); spillCandidates &= ~spillCandidateBit; - regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); - RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; - Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; + regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; + Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; + RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; + +// Can and should the interval in this register be spilled for this one, +// if we don't find a better alternative? + +#ifdef TARGET_ARM64 + if (assignedInterval == nullptr) + { + // Ideally we should not be seeing this candidate because it is not assigned to + // any interval. But based on that, we cannot determine if it is a good spill + // candidate or not. Skip processing it. + continue; + } - // Can and should the interval in this register be spilled for this one, - // if we don't find a better alternative? - if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == refPosition->nodeLocation) && + if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) && + (recentRefPosition->needsConsecutive)) + { + continue; + } +#endif // TARGET_ARM64 + + if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) && !assignedInterval->getNextRefPosition()->RegOptional()) { continue; @@ -11481,8 +11712,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() continue; } - weight_t currentSpillWeight = 0; - RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; + weight_t currentSpillWeight = 0; if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef()))) { @@ -11750,6 +11980,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Return Values: // Register bit selected (a single register) and REG_NA if no register was selected. // +template regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { @@ -11958,6 +12189,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, // Eliminate candidates that are in-use or busy. if (!found) { + // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. + // When we allocate for USE, we see that the register is busy at current location + // and we end up with that candidate is no longer available. regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; candidates &= ~busyRegs; @@ -11992,11 +12226,18 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, prevRegBit = genRegMask(prevRegRec->regNum); if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) { - candidates = prevRegBit; - found = true; +#ifdef TARGET_ARM64 + // If this is allocating for consecutive register, we need to make sure that + // we allocate register, whose consecutive registers are also free. + if (!needsConsecutiveRegisters) +#endif + { + candidates = prevRegBit; + found = true; #ifdef DEBUG - *registerScore = THIS_ASSIGNED; + *registerScore = THIS_ASSIGNED; #endif + } } } else @@ -12019,7 +12260,41 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, reverseSelect = linearScan->doReverseSelect(); #endif // DEBUG - freeCandidates = linearScan->getFreeCandidates(candidates, regType); +#ifdef TARGET_ARM64 + if (needsConsecutiveRegisters) + { + regMaskTP busyConsecutiveCandidates = RBM_NONE; + if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); + if (freeCandidates == RBM_NONE) + { + candidates = busyConsecutiveCandidates; + } + else + { + assert(busyConsecutiveCandidates == RBM_NONE); + } + } + else + { + // We should have a single candidate that will be used for subsequent + // refpositions. + assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + + freeCandidates = candidates & linearScan->m_AvailableRegs; + } + + if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) + { + noway_assert(!"Not sufficient consecutive registers available."); + } + } + else +#endif // TARGET_ARM64 + { + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + } // If no free candidates, then double check if refPosition is an actual ref. if (freeCandidates == RBM_NONE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 58739b48ae1381..373681085ce944 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -644,7 +644,10 @@ class LinearScan : public LinearScanInterface // This does the dataflow analysis and builds the intervals void buildIntervals(); - // This is where the actual assignment is done +// This is where the actual assignment is done +#ifdef TARGET_ARM64 + template +#endif void allocateRegisters(); // This is the resolution phase, where cross-block mismatches are fixed up @@ -789,6 +792,9 @@ class LinearScan : public LinearScanInterface #elif defined(TARGET_ARM64) static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + // LsraLimitFPSetForConsecutive is used for stress mode and gives few extra registers to satisfy + // the requirements for allocating consecutive registers. + static const regMaskTP LsraLimitFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); @@ -1179,7 +1185,9 @@ class LinearScan : public LinearScanInterface #ifdef DEBUG const char* getScoreName(RegisterScore score); #endif + template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + template regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); @@ -1207,10 +1215,21 @@ class LinearScan : public LinearScanInterface void spillGCRefs(RefPosition* killRefPosition); - /***************************************************************************** - * Register selection - ****************************************************************************/ - regMaskTP getFreeCandidates(regMaskTP candidates, var_types regType) +/***************************************************************************** +* Register selection +****************************************************************************/ + +#if defined(TARGET_ARM64) + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); + regMaskTP filterConsecutiveCandidates(regMaskTP candidates, + unsigned int registersNeeded, + regMaskTP* allConsecutiveCandidates); + regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded); +#endif // TARGET_ARM64 + + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1239,6 +1258,7 @@ class LinearScan : public LinearScanInterface RegisterSelection(LinearScan* linearScan); // Perform register selection and update currentInterval or refPosition + template FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); @@ -1384,6 +1404,21 @@ class LinearScan : public LinearScanInterface VARSET_VALARG_TP sharedCriticalLiveSet, regMaskTP terminatorConsumedRegs); +#ifdef TARGET_ARM64 + typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; + NextConsecutiveRefPositionsMap* nextConsecutiveRefPositionMap; + NextConsecutiveRefPositionsMap* getNextConsecutiveRefPositionsMap() + { + if (nextConsecutiveRefPositionMap == nullptr) + { + nextConsecutiveRefPositionMap = + new (getAllocator(compiler)) NextConsecutiveRefPositionsMap(getAllocator(compiler)); + } + return nextConsecutiveRefPositionMap; + } + FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); +#endif + #ifdef DEBUG void dumpVarToRegMap(VarToRegMap map); void dumpInVarToRegMap(BasicBlock* block); @@ -1882,7 +1917,6 @@ class LinearScan : public LinearScanInterface bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); - void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); int BuildCastUses(GenTreeCast* cast, regMaskTP candidates); @@ -1967,6 +2001,9 @@ class LinearScan : public LinearScanInterface #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); +#ifdef TARGET_ARM64 + int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr); +#endif #endif // FEATURE_HW_INTRINSICS int BuildPutArgStk(GenTreePutArgStk* argNode); @@ -2350,6 +2387,14 @@ class RefPosition // would be 0..MAX_MULTIREG_COUNT-1. unsigned char multiRegIdx : 2; +#ifdef TARGET_ARM64 + // If this refposition needs consecutive register assignment + unsigned char needsConsecutive : 1; + + // How many consecutive registers does this and subsequent refPositions need + unsigned char regCount : 3; +#endif // TARGET_ARM64 + // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; @@ -2435,6 +2480,10 @@ class RefPosition , registerAssignment(RBM_NONE) , refType(refType) , multiRegIdx(0) +#ifdef TARGET_ARM64 + , needsConsecutive(false) + , regCount(0) +#endif , lastUse(false) , reload(false) , spillAfter(false) @@ -2588,6 +2637,19 @@ class RefPosition return (isFixedRefOfRegMask(genRegMask(regNum))); } +#ifdef TARGET_ARM64 + /// For consecutive registers, returns true if this RefPosition is + /// the first of the series. + FORCEINLINE bool isFirstRefPositionOfConsecutiveRegisters() + { + if (needsConsecutive) + { + return regCount != 0; + } + return false; + } +#endif // TARGET_ARM64 + #ifdef DEBUG // operator= copies everything except 'rpNum', which must remain unique RefPosition& operator=(const RefPosition& rp) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 394f402d0a3383..72047f67a56b35 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -25,6 +25,534 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "sideeffects.h" #include "lower.h" +//------------------------------------------------------------------------ +// getNextConsecutiveRefPosition: Get the next subsequent RefPosition. +// +// Arguments: +// refPosition - The RefPosition for which we need to find the next RefPosition. +// +// Return Value: +// The next RefPosition or nullptr if there is not one. +// +RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) +{ + assert(compiler->info.compNeedsConsecutiveRegisters); + RefPosition* nextRefPosition; + assert(refPosition->needsConsecutive); + nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); + assert((nextRefPosition == nullptr) || nextRefPosition->needsConsecutive); + return nextRefPosition; +} + +//------------------------------------------------------------------------ +// assignConsecutiveRegisters: For subsequent RefPositions, set the register +// requirement to be the consecutive register(s) of the register that is assigned to +// the firstRefPosition. +// If one of the subsequent RefPosition is RefTypeUpperVectorRestore, sets the +// registerAssignment to not include any of the consecutive registers that are being +// assigned to the RefTypeUse RefPositions. +// +// Arguments: +// firstRefPosition - First RefPosition of the series of consecutive registers. +// firstRegAssigned - Register assigned to the first RefPosition. +// +// Note: +// This method will set the registerAssignment of subsequent RefPositions with consecutive registers. +// Some of the registers could be busy, and they will be spilled. We would end up with busy registers if +// we did not find free consecutive registers. +// +void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned) +{ + assert(compiler->info.compNeedsConsecutiveRegisters); + assert(firstRefPosition->assignedReg() == firstRegAssigned); + assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); + assert(emitter::isVectorRegister(firstRegAssigned)); + + RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); + regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); + + // First RefPosition should always start with RefTypeUse + assert(firstRefPosition->refType != RefTypeUpperVectorRestore); + + INDEBUG(int refPosCount = 1); + regMaskTP busyConsecutiveRegMask = (((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); + + while (consecutiveRefPosition != nullptr) + { + assert(consecutiveRefPosition->regCount == 0); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) + { + Interval* srcInterval = consecutiveRefPosition->getInterval(); + assert(srcInterval->isUpperVector); + assert(srcInterval->relatedInterval != nullptr); + if (srcInterval->relatedInterval->isPartiallySpilled) + { + // Make sure that restore doesn't get one of the registers that are part of series we are trying to set + // currently. + // TODO-CQ: We could technically assign RefTypeUpperVectorRestore and its RefTypeUse same register, but + // during register selection, it might get tricky to know which of the busy registers are assigned to + // RefTypeUpperVectorRestore positions of corresponding variables for which (another criteria) + // we are trying to find consecutive registers. + + consecutiveRefPosition->registerAssignment &= ~busyConsecutiveRegMask; + } + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + INDEBUG(refPosCount++); + assert(consecutiveRefPosition->refType == RefTypeUse); + consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + } + + assert(refPosCount == firstRefPosition->regCount); +} + +//------------------------------------------------------------------------ +// canAssignNextConsecutiveRegisters: Starting with `firstRegAssigned`, check if next +// consecutive registers are free or are already assigned to the subsequent RefPositions. +// +// Arguments: +// firstRefPosition - First RefPosition of the series of consecutive registers. +// firstRegAssigned - Register assigned to the first RefPosition. +// +// Returns: +// True if all the consecutive registers starting from `firstRegAssigned` are assignable. +// Even if one of them is busy, returns false. +// +bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned) +{ + int registersCount = firstRefPosition->regCount; + RefPosition* nextRefPosition = firstRefPosition; + regNumber regToAssign = firstRegAssigned; + assert(compiler->info.compNeedsConsecutiveRegisters); + assert(registersCount > 1); + assert(emitter::isVectorRegister(firstRegAssigned)); + + int i = 1; + do + { + nextRefPosition = getNextConsecutiveRefPosition(nextRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + if (!isFree(getRegisterRecord(regToAssign))) + { + if (nextRefPosition->refType == RefTypeUpperVectorRestore) + { + nextRefPosition = getNextConsecutiveRefPosition(nextRefPosition); + } + + // If regToAssign is not free, check if it is already assigned to the interval corresponding + // to the subsequent nextRefPosition. If yes, it would just use regToAssign for that nextRefPosition. + if ((nextRefPosition->getInterval() != nullptr) && + (nextRefPosition->getInterval()->assignedReg != nullptr) && + ((nextRefPosition->getInterval()->assignedReg->regNum == regToAssign))) + { + continue; + } + + return false; + } + } while (++i != registersCount); + + return true; +} + +//------------------------------------------------------------------------ +// filterConsecutiveCandidates: Given `candidates`, check if `registersNeeded` consecutive +// registers are available in it, and if yes, returns first bit set of every possible series. +// +// Arguments: +// candidates - Set of available candidates. +// registersNeeded - Number of consecutive registers needed. +// allConsecutiveCandidates - Mask returned containing all bits set for possible consecutive register candidates. +// +// Returns: +// From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit +// set. +// +regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, + unsigned int registersNeeded, + regMaskTP* allConsecutiveCandidates) +{ + if (BitOperations::PopCount(candidates) < registersNeeded) + { + // There is no way the register demanded can be satisfied for this RefPosition + // based on the candidates from which it can allocate a register. + return RBM_NONE; + } + + regMaskTP currAvailableRegs = candidates; + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; + +// At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are +// available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it +// is safe to assign any of those registers, but not beyond that. +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ + overallResult |= availableRegistersMask; + + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + + // If we don't find consecutive registers, also track which registers we can pick so + // as to reduce the number of registers we will have to spill, to accomodate the + // request of the consecutive registers. + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + + do + { + // From LSB, find the first available register (bit `1`) + BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + + // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. + regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + + // From regAvailableStart, find the first unavailable register (bit `0`). + if (maskProcessed == RBM_NONE) + { + regAvailableEndIndex = 64; + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); + } + break; + } + else + { + BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); + } + regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + + // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. + // If they are equal to or greater than our register requirements, then add all of them to the result. + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); + } + currAvailableRegs &= ~endMask; + } while (currAvailableRegs != RBM_NONE); + + regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; + if ((candidates & v0_v31_mask) == v0_v31_mask) + { + // Finally, check for round robin case where sequence of last register + // round to first register is available. + // For n registers needed, it checks if MSB (n-1) + LSB (1) or + // MSB (n - 2) + LSB (2) registers are available and if yes, + // set the least bit of such MSB. + // + // This could have done using bit-twiddling, but is simpler when the + // checks are done with these hardcoded values. + switch (registersNeeded) + { + case 2: + { + if ((candidates & v0_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V31; + overallResult |= v0_v31_mask; + } + break; + } + case 3: + { + regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + if ((candidates & v0_v30_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V30; + overallResult |= v0_v30_v31_mask; + } + + regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + if ((candidates & v0_v1_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V31; + overallResult |= v0_v1_v31_mask; + } + break; + } + case 4: + { + regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V29; + overallResult |= v0_v29_v30_v31_mask; + } + + regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V30; + overallResult |= v0_v1_v30_v31_mask; + } + + regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) + { + consecutiveResult |= RBM_V31; + overallResult |= v0_v1_v2_v31_mask; + } + break; + } + default: + assert(!"Unexpected registersNeeded\n"); + } + } + + // consecutiveResult should always be a subset of overallResult + assert((overallResult & consecutiveResult) == consecutiveResult); + *allConsecutiveCandidates = overallResult; + return consecutiveResult; +} + +//------------------------------------------------------------------------ +// filterConsecutiveCandidatesForSpill: Amoung the selected consecutiveCandidates, +// check if there are any ranges that would require fewer registers to spill +// and returns such mask. The return result would always be a subset of +// consecutiveCandidates. +// +// Arguments: +// consecutiveCandidates - Consecutive candidates to filter on. +// registersNeeded - Number of registers needed. +// +// Returns: +// Filtered candidates that needs fewer spilling. +// +regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded) +{ + assert(consecutiveCandidates != RBM_NONE); + assert((registersNeeded >= 2) && (registersNeeded <= 4)); + regMaskTP consecutiveResultForBusy = RBM_NONE; + regMaskTP unprocessedRegs = consecutiveCandidates; + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + int maxSpillRegs = registersNeeded; + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + do + { + // From LSB, find the first available register (bit `1`) + BitScanForward64(®AvailableStartIndex, static_cast(unprocessedRegs)); + + // For the current range, find how many registers are free vs. busy + regMaskTP maskForCurRange = RBM_NONE; + bool shouldCheckForRounding = false; + switch (registersNeeded) + { + case 2: + shouldCheckForRounding = (regAvailableStartIndex == 63); + break; + case 3: + shouldCheckForRounding = (regAvailableStartIndex >= 62); + break; + case 4: + shouldCheckForRounding = (regAvailableStartIndex >= 61); + break; + default: + assert("Unsupported registersNeeded\n"); + break; + } + + if (shouldCheckForRounding) + { + unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1); + maskForCurRange = (1ULL << roundedRegistersNeeded) - 1; + } + + maskForCurRange |= (registersNeededMask << regAvailableStartIndex); + maskForCurRange &= m_AvailableRegs; + + if (maskForCurRange != RBM_NONE) + { + // In the given range, there are some free registers available. Calculate how many registers + // will need spilling if this range is picked. + + int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange); + if (curSpillRegs < maxSpillRegs) + { + consecutiveResultForBusy = 1ULL << regAvailableStartIndex; + maxSpillRegs = curSpillRegs; + } + else if (curSpillRegs == maxSpillRegs) + { + consecutiveResultForBusy |= 1ULL << regAvailableStartIndex; + } + } + unprocessedRegs &= ~(1ULL << regAvailableStartIndex); + } while (unprocessedRegs != RBM_NONE); + + // consecutiveResultForBusy should always be a subset of consecutiveCandidates. + assert((consecutiveCandidates & consecutiveResultForBusy) == consecutiveResultForBusy); + return consecutiveResultForBusy; +} + +//------------------------------------------------------------------------ +// getConsecutiveCandidates: Returns the mask of all the consecutive candidates +// for given RefPosition. For first RefPosition of a series of RefPositions that needs +// consecutive registers, then returns only the mask such that it satisfies the need +// of having free consecutive registers. If free consecutive registers are not available +// it finds such a series that needs fewer registers spilling. +// +// Arguments: +// allCandidates - Register assigned to the first RefPosition. +// refPosition - Number of registers to check. +// busyCandidates - Register mask of free/busy registers. +// +// Returns: +// Register mask of free consecutive registers. If there are not enough free registers, +// or the free registers are not consecutive, then return RBM_NONE. In that case, +// `busyCandidates` will contain the register mask that can be assigned and will include +// both free and busy registers. +// +// Notes: +// The consecutive registers mask includes just the bits of first registers or +// (n - k) registers. For example, if we need 3 consecutive registers and +// allCandidates = 0x1C080D0F00000000, the consecutive register mask returned +// will be 0x400000300000000. +// +regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, + RefPosition* refPosition, + regMaskTP* busyCandidates) +{ + assert(compiler->info.compNeedsConsecutiveRegisters); + assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); + regMaskTP freeCandidates = allCandidates & m_AvailableRegs; + if (freeCandidates == RBM_NONE) + { + return freeCandidates; + } + + *busyCandidates = RBM_NONE; + regMaskTP overallResult; + unsigned int registersNeeded = refPosition->regCount; + + regMaskTP consecutiveResultForFree = filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); + if (consecutiveResultForFree != RBM_NONE) + { + // One last time, check if subsequent RefPositions (all RefPositions except the first for which + // we assigned above) already have consecutive registers assigned. If yes, and if one of the + // register out of the `consecutiveResult` is available for the first RefPosition, then just use + // that. This will avoid unnecessary copies. + + regNumber firstRegNum = REG_NA; + regNumber prevRegNum = REG_NA; + int foundCount = 0; + regMaskTP foundRegMask = RBM_NONE; + + RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); + assert(consecutiveRefPosition != nullptr); + + for (unsigned int i = 1; i < registersNeeded; i++) + { + Interval* interval = consecutiveRefPosition->getInterval(); + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + + if (!interval->isActive) + { + foundRegMask = RBM_NONE; + foundCount = 0; + continue; + } + + regNumber currRegNum = interval->assignedReg->regNum; + if ((prevRegNum == REG_NA) || (prevRegNum == REG_PREV(currRegNum)) || + ((prevRegNum == REG_FP_LAST) && (currRegNum == REG_FP_FIRST))) + { + foundRegMask |= genRegMask(currRegNum); + if (prevRegNum == REG_NA) + { + firstRegNum = currRegNum; + } + prevRegNum = currRegNum; + foundCount++; + continue; + } + + foundRegMask = RBM_NONE; + foundCount = 0; + break; + } + + if (foundCount != 0) + { + assert(firstRegNum != REG_NA); + regMaskTP remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1); + + if ((overallResult & remainingRegsMask) != RBM_NONE) + { + // If remaining registers are available, then just set the firstRegister mask + consecutiveResultForFree = 1ULL << (firstRegNum - 1); + } + } + + return consecutiveResultForFree; + } + + // There are registers available but they are not consecutive. + // Here are some options to address them: + // + // 1. Scan once again the available registers and find a set which has maximum register available. + // In other words, try to find register sequence that needs fewer registers to be spilled. This + // will give optimal CQ. + // + // 2. Check if some of the RefPositions in the series are already in *somewhat* consecutive registers + // and if yes, assign that register sequence. That way, we will avoid copying values of + // RefPositions that are already positioned in the desired registers. Checking this is beneficial + // only if it can happen frequently. So for RefPositions , it should + // be that, RP# 6 is already in V14 and RP# 8 is already in V16. But this can be rare (not tested). + // In future, if we see such cases being hit, we could use this heuristics. + // + // 3. Give one of the free register to the first position and the algorithm will + // give the subsequent consecutive registers (free or busy) to the remaining RefPositions + // of the series. This may not give optimal CQ however. + // + // 4. Return the set of available registers and let selection heuristics pick one of them to get + // assigned to the first RefPosition. Remaining RefPositions will be assigned to the subsequent + // registers (if busy, they will be spilled), similar to #3 above and will not give optimal CQ. + // + // + // Among `consecutiveResultForBusy`, we could shortlist the registers that are beneficial from "busy register + // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), + // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select + // registers based on fewer number of registers that has to be spilled. + // + regMaskTP overallResultForBusy; + regMaskTP consecutiveResultForBusy = + filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); + + *busyCandidates = consecutiveResultForBusy; + + // Check if we can further check better registers amoung consecutiveResultForBusy. + if ((m_AvailableRegs & overallResultForBusy) != RBM_NONE) + { + // `overallResultForBusy` contains the mask of entire series that can be the consecutive candidates. + // If there is an overlap of that with free registers, then try to find a series that will need least + // registers spilling as mentioned in #1 above. + + regMaskTP optimalConsecutiveResultForBusy = + filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded); + + if (optimalConsecutiveResultForBusy != RBM_NONE) + { + *busyCandidates = optimalConsecutiveResultForBusy; + } + else if ((m_AvailableRegs & consecutiveResultForBusy) != RBM_NONE) + { + // We did not find free consecutive candidates, however we found some registers among the + // `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first + // register of such series, return the mask that starts with free register, if possible. The busy + // registers will be spilled during assignment of subsequent RefPosition. + *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy); + } + } + + // Return RBM_NONE because there was no free candidates. + return RBM_NONE; +} //------------------------------------------------------------------------ // BuildNode: Build the RefPositions for a node // @@ -992,10 +1520,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou tgtPrefUse = BuildUse(intrin.op1); srcCount++; } - else + else if ((intrin.id != NI_AdvSimd_VectorTableLookup) && (intrin.id != NI_AdvSimd_Arm64_VectorTableLookup)) { srcCount += BuildOperandUses(intrin.op1); } + else + { + srcCount += BuildConsecutiveRegistersForUse(intrin.op1); + } } if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2)) @@ -1041,6 +1573,30 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } } + + else if (HWIntrinsicInfo::NeedsConsecutiveRegisters(intrin.id)) + { + if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) + { + assert(intrin.op2 != nullptr); + srcCount += BuildOperandUses(intrin.op2); + } + else + { + assert(intrin.op2 != nullptr); + assert(intrin.op3 != nullptr); + assert((intrin.id == NI_AdvSimd_VectorTableLookupExtension) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookupExtension)); + assert(isRMW); + srcCount += BuildConsecutiveRegistersForUse(intrin.op2, intrin.op1); + srcCount += BuildDelayFreeUses(intrin.op3, intrin.op1); + } + assert(dstCount == 1); + buildInternalRegisterUses(); + BuildDef(intrinsicTree); + *pDstCount = 1; + return srcCount; + } else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg @@ -1110,6 +1666,147 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou *pDstCount = dstCount; return srcCount; } + +//------------------------------------------------------------------------ +// BuildConsecutiveRegistersForUse: Build ref position(s) for `treeNode` that has a +// requirement of allocating consecutive registers. It will create the RefTypeUse +// RefPositions for as many consecutive registers are needed for `treeNode` and in +// between, it might contain RefTypeUpperVectorRestore RefPositions. +// +// For the first RefPosition of the series, it sets the `regCount` field equal to +// the number of subsequent RefPositions (including the first one) involved for this +// treeNode. For the subsequent RefPositions, it sets the `regCount` to 0. For all +// the RefPositions created, it sets the `needsConsecutive` flag so it can be used to +// identify these RefPositions during allocation. +// +// It also populates a `RefPositionMap` to access the subsequent RefPositions from +// a given RefPosition. This was preferred rather than adding a field in RefPosition +// for this purpose. +// +// Arguments: +// treeNode - The GT_HWINTRINSIC node of interest +// rmwNode - Read-modify-write node. +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode) +{ + int srcCount = 0; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + if ((rmwNode != nullptr)) + { + if (isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } + } + if (treeNode->OperIsFieldList()) + { + assert(compiler->info.compNeedsConsecutiveRegisters); + + unsigned regCount = 0; + RefPosition* firstRefPos = nullptr; + RefPosition* currRefPos = nullptr; + RefPosition* lastRefPos = nullptr; + + NextConsecutiveRefPositionsMap* refPositionMap = getNextConsecutiveRefPositionsMap(); + for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses()) + { + RefPosition* restoreRefPos = nullptr; + RefPositionIterator prevRefPos = refPositions.backPosition(); + currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0); + + // Check if restore Refpositions were created + RefPositionIterator tailRefPos = refPositions.backPosition(); + assert(tailRefPos == currRefPos); + prevRefPos++; + if (prevRefPos != tailRefPos) + { + restoreRefPos = prevRefPos; + assert(restoreRefPos->refType == RefTypeUpperVectorRestore); + } + + currRefPos->needsConsecutive = true; + currRefPos->regCount = 0; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (restoreRefPos != nullptr) + { + // If there was a restoreRefPosition created, make sure to link it + // as well so during register assignment, we could visit it and + // make sure that it doesn't get assigned one of register that is part + // of consecutive registers we are allocating for this treeNode. + // See assignConsecutiveRegisters(). + restoreRefPos->needsConsecutive = true; + restoreRefPos->regCount = 0; + if (firstRefPos == nullptr) + { + // Always set the non UpperVectorRestore as the firstRefPos. + // UpperVectorRestore can be assigned to a different independent + // register. + // See TODO-CQ in assignConsecutiveRegisters(). + firstRefPos = currRefPos; + } + refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + refPositionMap->Set(restoreRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + + if (rmwNode != nullptr) + { + // If we have rmwNode, determine if the restoreRefPos should be set to delay-free. + if ((restoreRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !restoreRefPos->lastUse)) + { + setDelayFree(restoreRefPos); + } + } + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + { + if (firstRefPos == nullptr) + { + firstRefPos = currRefPos; + } + refPositionMap->Set(lastRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + } + + refPositionMap->Set(currRefPos, nullptr); + + lastRefPos = currRefPos; + regCount++; + if (rmwNode != nullptr) + { + // If we have rmwNode, determine if the currRefPos should be set to delay-free. + if ((currRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !currRefPos->lastUse)) + { + setDelayFree(currRefPos); + } + } + } + + // Set `regCount` to actual consecutive registers count for first ref-position. + // For others, set 0 so we can identify that this is non-first RefPosition. + firstRefPos->regCount = regCount; + +#ifdef DEBUG + // Set the minimum register candidates needed for stress to work. + currRefPos = firstRefPos; + while (currRefPos != nullptr) + { + currRefPos->minRegCandidateCount = regCount; + currRefPos = getNextConsecutiveRefPosition(currRefPos); + } +#endif + srcCount += regCount; + } + else + { + srcCount += BuildOperandUses(treeNode); + } + + return srcCount; +} #endif #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f5f2ad7f409d9b..bb0a10779d42a7 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1674,7 +1674,7 @@ int LinearScan::ComputeOperandDstCount(GenTree* operand) } if (operand->IsValue()) { - // Operands that are values and are not contained consume all of their operands + // Operands that are values and are not contained, consume all of their operands // and produce one or more registers. return operand->GetRegisterDstCount(compiler); } @@ -1859,6 +1859,12 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); newRefPosition->registerAssignment = getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); +#ifdef TARGET_ARM64 + if (newRefPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + newRefPosition->registerAssignment |= LsraLimitFPSetForConsecutive; + } +#endif if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && !interval->isLocalVar) { @@ -2152,6 +2158,8 @@ void LinearScan::buildIntervals() } #endif // DEBUG + resetRegState(); + #if DOUBLE_ALIGN // We will determine whether we should double align the frame during // identifyCandidates(), but we initially assume that we will not. @@ -3062,9 +3070,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // the defList, and build a use RefPosition for the associated Interval. // // Arguments: -// operand - The node of interest -// candidates - The register candidates for the use -// multiRegIdx - The index of the multireg def/use +// operand - The node of interest +// candidates - The register candidates for the use +// multiRegIdx - The index of the multireg def/use // // Return Value: // The newly created use RefPosition diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index c2727e85174519..a945aecc79d096 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3662,6 +3662,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B}, Vm.16B @@ -3674,6 +3710,43 @@ internal Arm64() { } /// public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, Vector128 table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqtbx2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbx3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbx4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) /// A64: ZIP2 Vd.8B, Vn.8B, Vm.8B @@ -14967,6 +15040,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x8_t vqtbl2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbl2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbl3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbl3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbl4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbl4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) /// A32: VTBX Dd, {Dn, Dn+1}, Dm @@ -14981,6 +15090,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, Vector128 table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x8_t vqtbx2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbx3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbx4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t veor_u8 (uint8x8_t a, uint8x8_t b) /// A32: VEOR Dd, Dn, Dm diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 3a866329010fbd..72f1c60311491a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3660,6 +3660,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B}, Vm.16B @@ -3672,6 +3708,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, Vector128 table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// + /// uint8x16_t vqtbx2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x16_t vqtbx2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// uint8x16_t vqtbx3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x16_t vqtbx3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// uint8x16_t vqtbx4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x16_t vqtbx4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// /// uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) /// A64: ZIP2 Vd.8B, Vn.8B, Vm.8B @@ -14965,6 +15037,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x8_t vqtbl2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x8_t vqtbl2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x8_t vqtbl3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x8_t vqtbl3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x8_t vqtbl4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x8_t vqtbl4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) /// A32: VTBX Dd, {Dn, Dn+1}, Dm @@ -14979,6 +15087,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, Vector128 table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// + /// uint8x8_t vqtbx2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x8_t vqtbx2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// uint8x8_t vqtbx3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x8_t vqtbx3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// uint8x8_t vqtbx4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + + /// + /// int8x8_t vqtbx4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// /// uint8x8_t veor_u8 (uint8x8_t a, uint8x8_t b) /// A32: VEOR Dd, Dn, Dm diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index f9fafdc866d89e..eef63aed18e00f 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2856,8 +2856,20 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3493,8 +3505,20 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index 3c79c7aa59427e..dad84520297216 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -466,6 +466,12 @@ INTRINS_OVR_TAG(AARCH64_ADV_SIMD_SRI, aarch64_neon_vsri, Arm64, V64 | V128 | I1 INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX1, aarch64_neon_tbx1, Arm64, V64 | V128 | I1) INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL1, aarch64_neon_tbl1, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX2, aarch64_neon_tbx2, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL2, aarch64_neon_tbl2, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX3, aarch64_neon_tbx3, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL3, aarch64_neon_tbl3, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX4, aarch64_neon_tbx4, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL4, aarch64_neon_tbl4, Arm64, V64 | V128 | I1) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SDOT, aarch64_neon_sdot, Arm64, Arm64DotProd, V64 | V128 | I4) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UDOT, aarch64_neon_udot, Arm64, Arm64DotProd, V64 | V128 | I4) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 1d78c8dd81d825..2d70d15001317f 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -11312,6 +11312,63 @@ MONO_RESTORE_WARNING values [ins->dreg] = result; break; } + case OP_ARM64_TBL_INDIRECT: + case OP_ARM64_TBX_INDIRECT: { + int nvectors = ins->inst_c0; + guint32 *offsets = (guint32*)ins->inst_p1; + + LLVMValueRef args [16]; + + LLVMTypeRef etype = LLVMVectorType (LLVMInt8Type (), 16); + + int aindex = 0; + + LLVMValueRef table_val, default_values_val, indexes_val; + if (ins->opcode == OP_ARM64_TBX_INDIRECT) { + table_val = lhs; + default_values_val = rhs; + indexes_val = arg3; + args [aindex ++] = default_values_val; + } else { + table_val = lhs; + indexes_val = rhs; + } + + /* Load input vectors from memory */ + LLVMValueRef addr = convert (ctx, table_val, pointer_type (etype)); + for (int i = 0; i < nvectors; ++i) { + g_assert (offsets [i] % 16 == 0); + LLVMValueRef index = const_int32 (offsets [i] / 16); + LLVMValueRef ptr = LLVMBuildGEP2 (builder, etype, addr, &index, 1, ""); + args [aindex ++] = emit_load (builder, etype, ptr, "", FALSE); + } + args [aindex ++] = indexes_val; + g_assert (aindex < 16); + + IntrinsicId iid = (IntrinsicId)0; + if (ins->opcode == OP_ARM64_TBL_INDIRECT) { + switch (nvectors) { + case 2: iid = INTRINS_AARCH64_ADV_SIMD_TBL2; break; + case 3: iid = INTRINS_AARCH64_ADV_SIMD_TBL3; break; + case 4: iid = INTRINS_AARCH64_ADV_SIMD_TBL4; break; + default: + g_assert_not_reached (); + break; + } + } else { + switch (nvectors) { + case 2: iid = INTRINS_AARCH64_ADV_SIMD_TBX2; break; + case 3: iid = INTRINS_AARCH64_ADV_SIMD_TBX3; break; + case 4: iid = INTRINS_AARCH64_ADV_SIMD_TBX4; break; + default: + g_assert_not_reached (); + break; + } + } + llvm_ovr_tag_t ovr_tag = (LLVMGetVectorSize (LLVMTypeOf (indexes_val)) == 8 ? INTRIN_vector64 : INTRIN_vector128) | INTRIN_int8; + values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); + break; + } case OP_XOP_OVR_X_X: { IntrinsicId iid = (IntrinsicId) ins->inst_c0; llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 162711201ad142..e844b5cccac3df 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1755,6 +1755,14 @@ MINI_OP3(OP_ARM64_SQRDMLSH, "arm64_sqrdmlsh", XREG, XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLSH_BYSCALAR, "arm64_sqrdmlsh_byscalar", XREG, XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLSH_SCALAR, "arm64_sqrdmlsh_scalar", XREG, XREG, XREG, XREG) +/* + * sreg1 points to a memory area with the input vectors. + * inst_c0 is the number of vectors. + * inst_p1 points to an int array with the offsets inside the memory area. + */ +MINI_OP(OP_ARM64_TBL_INDIRECT, "arm64_tbl_indirect", XREG, IREG, XREG) +MINI_OP3(OP_ARM64_TBX_INDIRECT, "arm64_tbx_indirect", XREG, IREG, XREG, XREG) + #endif // TARGET_ARM64 MINI_OP(OP_FCVTL, "convert_to_higher_precision", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 633167c6335841..ff70cab60f5b32 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -440,6 +440,12 @@ emit_simd_ins_for_unary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignat #endif } +static gboolean +type_is_simd_vector (MonoType *type) +{ + return type->type == MONO_TYPE_GENERICINST && m_class_is_simd_type (mono_class_from_mono_type_internal (type)); +} + static gboolean is_hw_intrinsics_class (MonoClass *klass, const char *name, gboolean *is_64bit) { @@ -3260,8 +3266,8 @@ static SimdIntrinsic advsimd_methods [] = { {SN_TransposeOdd, OP_ARM64_TRN2}, {SN_UnzipEven, OP_ARM64_UZP1}, {SN_UnzipOdd, OP_ARM64_UZP2}, - {SN_VectorTableLookup, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1}, - {SN_VectorTableLookupExtension, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1}, + {SN_VectorTableLookup}, + {SN_VectorTableLookupExtension}, {SN_Xor, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR}, {SN_ZeroExtendWideningLower, OP_ARM64_UXTL}, {SN_ZeroExtendWideningUpper, OP_ARM64_UXTL2}, @@ -3565,6 +3571,63 @@ emit_arm64_intrinsics ( ret->sreg3 = scalar->dreg; return ret; } + case SN_VectorTableLookup: + case SN_VectorTableLookupExtension: { + if (type_is_simd_vector (fsig->params [0]) && type_is_simd_vector (fsig->params [1])) { + if (id == SN_VectorTableLookup) + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1, 0, fsig, args); + else + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); + } + + MonoInst *ins, *addr; + int tuple_argindex; + + if (id == SN_VectorTableLookup) + /* VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) */ + tuple_argindex = 0; + else + /* VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes */ + tuple_argindex = 1; + + /* + * These intrinsics have up to 5 inputs, and our IR can't model that, so save the inputs to the stack and have + * the LLVM implementation read them back. + */ + MonoType *tuple_type = fsig->params [tuple_argindex]; + g_assert (tuple_type->type == MONO_TYPE_GENERICINST); + MonoClass *tclass = mono_class_from_mono_type_internal (tuple_type); + mono_class_init_internal (tclass); + + MonoClassField *fields = m_class_get_fields (tclass); + int nfields = mono_class_get_field_count (tclass); + guint32 *offsets = mono_mempool_alloc0 (cfg->mempool, nfields * sizeof (guint32)); + for (uint32_t i = 0; i < mono_class_get_field_count (tclass); ++i) + offsets [i] = mono_field_get_offset (&fields [i]) - MONO_ABI_SIZEOF (MonoObject); + + int vreg = alloc_xreg (cfg); + NEW_VARLOADA_VREG (cfg, addr, vreg, tuple_type); + MONO_ADD_INS (cfg->cbb, addr); + + EMIT_NEW_STORE_MEMBASE_TYPE (cfg, ins, tuple_type, addr->dreg, 0, args [tuple_argindex]->dreg); + + MONO_INST_NEW (cfg, ins, id == SN_VectorTableLookup ? OP_ARM64_TBL_INDIRECT : OP_ARM64_TBX_INDIRECT); + ins->dreg = alloc_xreg (cfg); + ins->sreg1 = addr->dreg; + if (id == SN_VectorTableLookup) { + /* byteIndexes */ + ins->sreg2 = args [1]->dreg; + } else { + /* defaultValues */ + ins->sreg2 = args [0]->dreg; + /* byteIndexes */ + ins->sreg3 = args [2]->dreg; + } + ins->inst_c0 = nfields; + ins->inst_p1 = offsets; + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } default: g_assert_not_reached (); } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index e7a1b698a4d4db..3fcb781e65bc8b 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -1704,8 +1704,20 @@ ("VecBinOpTest.template", new Dictionary { ["TestName"] = "SubtractWideningUpper_Vector128_UInt64_Vector128_UInt32", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "SubtractWideningUpper", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.SubtractWideningUpper(left, right, i) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.Xor(left[i], right[i]) != result[i]"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Double", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Xor(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Int16", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.Xor(left[i], right[i]) != result[i]"}), @@ -2306,8 +2318,20 @@ ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "TransposeOdd_Vector128_UInt64", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "TransposeOdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateEntry"] = "result[index] != left[i+1] || result[++index] != right[i+1]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int16", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int32", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template new file mode 100644 index 00000000000000..a9b3b45ec11b32 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -0,0 +1,420 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookupExtension_2_{RetBaseType}() + { + var test = new VectorLookupExtension_2Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_2Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op2BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op2VectorType}<{Op2BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_2Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2), _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data3 = new {Op2BaseType}[Op1ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op2VectorType}<{Op2BaseType}> _clsVar3; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op2VectorType}<{Op2BaseType}> _fld3; + + private DataTable _dataTable; + + static VectorLookupExtension_2Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_2Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2), + _clsVar3 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op0, (op1, op2), op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_2Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2), _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template new file mode 100644 index 00000000000000..835bffa4afee00 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -0,0 +1,447 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookupExtension_3_{RetBaseType}() + { + var test = new VectorLookupExtension_3Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_3Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op2BaseType}[] inArray4, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op2VectorType}<{Op2BaseType}> _fld4; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op1BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_3Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data4 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op2VectorType}<{Op2BaseType}> _clsVar4; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op2VectorType}<{Op2BaseType}> _fld4; + + private DataTable _dataTable; + + static VectorLookupExtension_3Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_3Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2, + _clsVar3), + _clsVar4 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _clsVar4, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr); + var result = {Isa}.{Method}(op0, (op1, op2, op3), op4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, op4, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_3Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* op4, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template new file mode 100644 index 00000000000000..f06653ab4ef01b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -0,0 +1,474 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookupExtension_4_{RetBaseType}() + { + var test = new VectorLookupExtension_4Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_4Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] inArray5; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle inHandle5; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op1BaseType}[] inArray4, {Op2BaseType}[] inArray5, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray5 = inArray5.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfinArray5 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.inArray5 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.inHandle5 = GCHandle.Alloc(this.inArray5, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray5Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), (uint)sizeOfinArray5); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray5Ptr => Align((byte*)(inHandle5.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + inHandle5.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op1VectorType}<{Op1BaseType}> _fld4; + public {Op2VectorType}<{Op2BaseType}> _fld5; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_4Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _fld5, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data4 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data5 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op1VectorType}<{Op1BaseType}> _clsVar4; + private static {Op2VectorType}<{Op2BaseType}> _clsVar5; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _fld4; + private {Op2VectorType}<{Op2BaseType}> _fld5; + + private DataTable _dataTable; + + static VectorLookupExtension_4Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_4Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2, + _clsVar3, + _clsVar4), + _clsVar5 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _clsVar4, _clsVar5, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr); + var op5 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr); + var result = {Isa}.{Method}(op0, (op1, op2, op3, op4), op5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, op4, op5, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_4Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _fld5, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), op5); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* op4, void* op5, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), ref Unsafe.AsRef(op5), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op1BaseType}[] fourthOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp, fourthOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" fourthOp: ({string.Join(", ", fourthOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template new file mode 100644 index 00000000000000..5d06cc9d4924fa --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -0,0 +1,398 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookup_2_{RetBaseType}() + { + var test = new VectorLookup_2Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_2Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op2BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op2VectorType}<{Op2BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_2Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2), _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data3 = new {Op2BaseType}[Op1ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op2VectorType}<{Op2BaseType}> _clsVar3; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op2VectorType}<{Op2BaseType}> _fld3; + + private DataTable _dataTable; + + static VectorLookup_2Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_2Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2), + _clsVar3 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}((op1, op2), op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_2Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2), _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template new file mode 100644 index 00000000000000..a693f71e202477 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -0,0 +1,425 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookup_3_{RetBaseType}() + { + var test = new VectorLookup_3Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_3Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op2BaseType}[] inArray4, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op2VectorType}<{Op2BaseType}> _fld4; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_3Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, _fld4, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data4 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op2VectorType}<{Op2BaseType}> _clsVar4; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op2VectorType}<{Op2BaseType}> _fld4; + + private DataTable _dataTable; + + static VectorLookup_3Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_3Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2, + _clsVar3), + _clsVar4 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _clsVar4, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr); + var result = {Isa}.{Method}((op1, op2, op3), op4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, op4, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_3Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _fld4, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* op4, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template new file mode 100644 index 00000000000000..ba872cbe23b40e --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -0,0 +1,452 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] + [Fact] + public static void VectorLookup_4_{RetBaseType}() + { + var test = new VectorLookup_4Test__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_4Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] inArray5; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle inHandle5; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op1BaseType}[] inArray4, {Op2BaseType}[] inArray5, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray5 = inArray5.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfinArray5 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.inArray5 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.inHandle5 = GCHandle.Alloc(this.inArray5, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray5Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), (uint)sizeOfinArray5); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray5Ptr => Align((byte*)(inHandle5.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + inHandle5.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op1VectorType}<{Op1BaseType}> _fld4; + public {Op2VectorType}<{Op2BaseType}> _fld5; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_4Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, _fld4, _fld5, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data4 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data5 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op1VectorType}<{Op1BaseType}> _clsVar4; + private static {Op2VectorType}<{Op2BaseType}> _clsVar5; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _fld4; + private {Op2VectorType}<{Op2BaseType}> _fld5; + + private DataTable _dataTable; + + static VectorLookup_4Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_4Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2, + _clsVar3, + _clsVar4), + _clsVar5 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _clsVar4, _clsVar5, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr); + var op5 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr); + var result = {Isa}.{Method}((op1, op2, op3, op4), op5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, op4, op5, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_4Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _fld4, _fld5, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), op5); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* op4, void* op5, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), ref Unsafe.AsRef(op5), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op1BaseType}[] fourthOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp, fourthOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" fourthOp: ({string.Join(", ", fourthOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +}