From 6628904eb3bddf1cd4409ebc97243390b5ae36f0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Mar 2024 09:32:19 +0000 Subject: [PATCH 01/26] JIT ARM64-SVE: Allow LCL_VARs to store as mask --- src/coreclr/jit/CMakeLists.txt | 4 + src/coreclr/jit/codegenarm64.cpp | 21 ++++- src/coreclr/jit/emitarm64.cpp | 129 ++++++++++++++++++++++++------- src/coreclr/jit/emitarm64.h | 6 +- src/coreclr/jit/hwintrinsic.cpp | 15 +++- src/coreclr/jit/importer.cpp | 11 +++ src/coreclr/jit/instr.cpp | 44 ++++++++--- src/coreclr/jit/scopeinfo.cpp | 6 ++ src/coreclr/jit/simd.cpp | 5 ++ src/coreclr/jit/target.h | 4 +- src/coreclr/jit/targetarm64.h | 3 + src/coreclr/jit/vartype.h | 4 +- 12 files changed, 202 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 5ba50306d1b72a..7674e61b0ce8d5 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -75,12 +75,16 @@ function(create_standalone_jit) if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR ((TARGETDETAILS_ARCH STREQUAL "x86") AND NOT (TARGETDETAILS_OS STREQUAL "unix"))) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS) + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_SIMD) + target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_HW_INTRINSICS) endif () endfunction() if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX)) add_compile_definitions($<$>>:FEATURE_SIMD>) add_compile_definitions($<$>>:FEATURE_HW_INTRINSICS>) + add_compile_definitions($<$>>:FEATURE_MASKED_SIMD>) + add_compile_definitions($<$>>:FEATURE_MASKED_HW_INTRINSICS>) endif () # JIT_BUILD disables certain PAL_TRY debugging features diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 81370e6413835f..f690baf75d66b8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2771,7 +2771,16 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) emitAttr attr = emitActualTypeSize(targetType); emitter* emit = GetEmitter(); - emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + + if (ins == INS_sve_ldr && !varTypeUsesMaskReg(targetType)) + { + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0, INS_SCALABLE_OPTS_UNPREDICATED); + } + else + { + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + } + genProduceReg(tree); } } @@ -2956,7 +2965,15 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) instruction ins = ins_StoreFromSrc(dataReg, targetType); emitAttr attr = emitActualTypeSize(targetType); - emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct + if (ins == INS_sve_str && !varTypeUsesMaskReg(targetType)) + { + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0, INS_SCALABLE_OPTS_UNPREDICATED); + } + else + { + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + } } else // store into register (i.e move into register) { diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 92a144c26d08b0..9c99dd8fb06a87 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17311,13 +17311,19 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) * * Add an instruction referencing a register and a stack-based local variable. */ -void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +void emitter::emitIns_R_S(instruction ins, + emitAttr attr, + regNumber reg1, + int varx, + int offs, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - int disp = 0; - unsigned scale = 0; - bool isLdrStr = false; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + int disp = 0; + unsigned scale = 0; + bool isLdrStr = false; + bool isScalable = false; assert(offs >= 0); @@ -17353,6 +17359,31 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va scale = 0; break; + case INS_sve_ldr: + assert(isVectorRegister(reg1) || isPredicateRegister(reg1)); + isScalable = true; + + // TODO-SVE: This should probably be set earlier in the caller + size = EA_SCALABLE; + attr = size; + + // TODO-SVE: Use register number instead of enum + if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + { + fmt = IF_SVE_IE_2A; + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_ID_2A; + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + } + break; + default: NYI("emitIns_R_S"); // FP locals? return; @@ -17360,9 +17391,10 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } // end switch (ins) /* Figure out the variable's frame position */ - ssize_t imm; - int base; - bool FPbased; + ssize_t imm; + int base; + bool FPbased; + insFormat scalarfmt = fmt; base = emitComp->lvaFrameAddress(varx, &FPbased); disp = base + offs; @@ -17387,13 +17419,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va if (imm <= 0x0fff) { - fmt = IF_DI_2A; // add reg1,reg2,#disp + scalarfmt = IF_DI_2A; // add reg1,reg2,#disp } else { regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_DR_3A; // add reg1,reg2,rsvdReg + scalarfmt = IF_DR_3A; // add reg1,reg2,rsvdReg } } else @@ -17402,13 +17434,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va imm = disp; if (imm == 0) { - fmt = IF_LS_2A; + scalarfmt = IF_LS_2A; } else if ((imm < 0) || ((imm & mask) != 0)) { if ((imm >= -256) && (imm <= 255)) { - fmt = IF_LS_2C; + scalarfmt = IF_LS_2C; } else { @@ -17417,11 +17449,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } else if (imm > 0) { + // TODO: We should be able to scale values <0 for all variants. + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) { imm >>= scale; // The immediate is scaled by the size of the ld/st - fmt = IF_LS_2B; + scalarfmt = IF_LS_2B; } else { @@ -17433,10 +17467,15 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va { regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_LS_3A; + scalarfmt = IF_LS_3A; } } + // Set the format based on the immediate encoding + if (!isScalable) + { + fmt = scalarfmt; + } assert(fmt != IF_NONE); // Try to optimize a load/store with an alternative instruction. @@ -17564,7 +17603,12 @@ void emitter::emitIns_R_R_S_S( * * Add an instruction referencing a stack-based local variable and a register */ -void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +void emitter::emitIns_S_R(instruction ins, + emitAttr attr, + regNumber reg1, + int varx, + int offs, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { assert(offs >= 0); emitAttr size = EA_SIZE(attr); @@ -17573,6 +17617,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va unsigned scale = 0; bool isVectorStore = false; bool isStr = false; + bool isScalable = false; // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ @@ -17604,6 +17649,31 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va isStr = true; break; + case INS_sve_str: + assert(isVectorRegister(reg1) || isPredicateRegister(reg1)); + isScalable = true; + + // TODO-SVE: This should probably be set earlier in the caller + size = EA_SCALABLE; + attr = size; + + // TODO-SVE: Use register number instead of enum + if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + { + fmt = IF_SVE_JH_2A; + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JG_2A; + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + } + break; + default: NYI("emitIns_S_R"); // FP locals? return; @@ -17617,7 +17687,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va base = emitComp->lvaFrameAddress(varx, &FPbased); disp = base + offs; assert(scale >= 0); - if (isVectorStore) + if (isVectorStore || isScalable) { assert(scale <= 4); } @@ -17630,18 +17700,19 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; reg2 = encodingSPtoZR(reg2); - bool useRegForImm = false; - ssize_t imm = disp; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + bool useRegForImm = false; + ssize_t imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + insFormat scalarfmt = fmt; if (imm == 0) { - fmt = IF_LS_2A; + scalarfmt = IF_LS_2A; } else if ((imm < 0) || ((imm & mask) != 0)) { - if ((imm >= -256) && (imm <= 255)) + if (isValidSimm9(imm)) { - fmt = IF_LS_2C; + scalarfmt = IF_LS_2C; } else { @@ -17650,11 +17721,12 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va } else if (imm > 0) { + // TODO: We should be able to scale values <0 for all variants. + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) { imm >>= scale; // The immediate is scaled by the size of the ld/st - - fmt = IF_LS_2B; + scalarfmt = IF_LS_2B; } else { @@ -17668,9 +17740,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va // It is instead implicit when idSetIsLclVar() is set, with this encoding format. regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_LS_3A; + scalarfmt = IF_LS_3A; } + // Set the format based on the immediate encoding + if (!isScalable) + { + fmt = scalarfmt; + } assert(fmt != IF_NONE); // Try to optimize a store with an alternative instruction. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index c1b6fa20962694..6578c8d59d24fc 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1782,7 +1782,8 @@ void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); -void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_S_R( + instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); void emitIns_S_S_R_R( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); @@ -1800,7 +1801,8 @@ void emitIns_R_R_R_I_LdStPair(instruction ins, int offs2 = -1 DEBUG_ARG(unsigned var1RefsOffs = BAD_IL_OFFSET) DEBUG_ARG(unsigned var2RefsOffs = BAD_IL_OFFSET)); -void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_R_S( + instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); void emitIns_R_R_S_S( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 14c262524da2d8..a4d9f57e4d9413 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -778,7 +778,11 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, { arg = impSIMDPopStack(); } +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) + assert(varTypeIsSIMD(arg) || varTypeIsMask(arg)); +#else assert(varTypeIsSIMD(arg)); +#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD } else { @@ -1591,13 +1595,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) + if (HWIntrinsicInfo::IsMaskedOperation(intrinsic)) { - // Op1 input is a vector. HWInstrinsic requires a mask, so convert to a mask. assert(numArgs > 0); - GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); - op1 = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); - retNode->AsHWIntrinsic()->Op(1) = op1; + GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); + if (op1->TypeGet() != TYP_MASK) + { + // Op1 input is a vector. HWInstrinsic requires a mask. + retNode->AsHWIntrinsic()->Op(1) = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); + } } if (retType != nodeRetType) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e50773eefd4a53..9ab43dd3a88462 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6419,6 +6419,17 @@ void Compiler::impImportBlockCode(BasicBlock* block) impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local")); } +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) + // Masks must be converted to vectors before being stored to memory. + // But, for local stores we can optimise away the conversion + if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector) + { + op1 = op1->AsHWIntrinsic()->Op(1); + lvaTable[lclNum].lvType = TYP_MASK; + lclTyp = lvaGetActualType(lclNum); + } +#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD + op1 = gtNewStoreLclVarNode(lclNum, op1); // TODO-ASG: delete this zero-diff quirk. Requires some forward substitution work. diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 3d307ddfe7d963..d50051e5089922 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1680,12 +1680,16 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) return ins; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(srcType)) { +#if defined(TARGET_XARCH) return INS_kmovq_msk; +#elif defined(TARGET_ARM64) + return INS_mov; +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(srcType)); @@ -1830,12 +1834,16 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return ins; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(srcType)) { +#if defined(TARGET_XARCH) return INS_kmovq_msk; +#elif defined(TARGET_ARM64) + return INS_sve_ldr; +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(srcType)); @@ -1914,12 +1922,16 @@ instruction CodeGen::ins_Copy(var_types dstType) #endif } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(dstType)) { +#if defined(TARGET_XARCH) return INS_kmovq_msk; +#elif defined(TARGET_ARM64) + return INS_mov; +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(dstType)); @@ -2023,7 +2035,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(dstType)) { if (genIsValidMaskReg(srcReg)) @@ -2034,9 +2046,13 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) // mask to int assert(genIsValidIntOrFakeReg(srcReg)); +#if defined(TARGET_XARCH) return INS_kmovq_gpr; +#elif defined(TARGET_ARM64) + return INS_mov; +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(dstType)); @@ -2138,12 +2154,16 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return ins; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(dstType)) { +#if defined(TARGET_XARCH) return INS_kmovq_msk; +#elif defined(TARGET_ARM64) + return INS_sve_str; +#endif } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(dstType)); @@ -2255,7 +2275,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy return ins_Store(dstType, aligned); } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) if (varTypeUsesMaskReg(dstType)) { if (genIsValidMaskReg(srcReg)) @@ -2268,7 +2288,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy assert(genIsValidIntOrFakeReg(srcReg)); return ins_Store(dstType, aligned); } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // FEATURE_MASKED_SIMD assert(varTypeUsesFloatReg(dstType)); diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 01238efcfcbd0c..a368d295250485 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -301,6 +301,9 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_LONG: case TYP_DOUBLE: #endif // TARGET_64BIT +#if defined(TARGET_ARM64) + case TYP_MASK: +#endif // TARGET_ARM64 #if FEATURE_IMPLICIT_BYREFS // In the AMD64 ABI we are supposed to pass a struct by reference when its // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies @@ -433,6 +436,9 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH +#if defined(TARGET_ARM64) + case TYP_MASK: +#endif // TARGET_ARM64 { this->vlType = VLT_REG_FP; diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 48c23eb646411f..a9955d4bd6b792 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -456,7 +456,12 @@ GenTree* Compiler::impSIMDPopStack() { StackEntry se = impPopStack(); GenTree* tree = se.val; + +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) + assert(varTypeIsSIMD(tree) || varTypeIsMask(tree)); +#else assert(varTypeIsSIMD(tree)); +#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD // Handle calls that may return the struct via a return buffer. if (tree->OperIs(GT_CALL, GT_RET_EXPR)) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 1e32a1d88946cc..aeac5cdfed2653 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -393,7 +393,7 @@ inline bool genIsValidFloatReg(regNumber reg) return reg >= REG_FP_FIRST && reg <= REG_FP_LAST; } -#if defined(TARGET_XARCH) +#if defined(FEATURE_MASKED_SIMD) /***************************************************************************** * Return true if the register is a valid mask register */ @@ -401,7 +401,7 @@ inline bool genIsValidMaskReg(regNumber reg) { return reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST; } -#endif // TARGET_XARCH +#endif // FEATURE_MASKED_SIMD #ifdef TARGET_ARM diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 7d76fc41d3300c..6c7a06657a618b 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -56,6 +56,9 @@ #define REG_PREDICATE_HIGH_FIRST REG_P8 // Similarly, some instructions can only use the second half of the predicate registers. #define REG_PREDICATE_HIGH_LAST REG_P15 + #define REG_MASK_FIRST REG_PREDICATE_FIRST + #define REG_MASK_LAST REG_PREDICATE_LAST + static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST); #define REGNUM_BITS 6 // number of bits in a REG_* diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index ed57a76b6e7ad8..6f4096d3a71133 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -85,9 +85,9 @@ inline bool varTypeIsSIMD(T vt) template inline bool varTypeIsMask(T vt) { -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if defined(FEATURE_MASKED_SIMD) return (TypeGet(vt) == TYP_MASK); -#else // FEATURE_SIMD +#else // FEATURE_MASKED_SIMD return false; #endif } From ed574f994a7972756af7255798e1325c2e08d083 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 09:42:26 +0000 Subject: [PATCH 02/26] Remove FEATURE_MASKED_SIMD --- src/coreclr/jit/CMakeLists.txt | 2 -- src/coreclr/jit/hwintrinsic.cpp | 4 ++-- src/coreclr/jit/importer.cpp | 4 ++-- src/coreclr/jit/instr.cpp | 24 ++++++++++++------------ src/coreclr/jit/simd.cpp | 4 ++-- src/coreclr/jit/target.h | 4 ++-- src/coreclr/jit/vartype.h | 4 ++-- 7 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 7674e61b0ce8d5..9290a04eeaf0d3 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -75,7 +75,6 @@ function(create_standalone_jit) if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR ((TARGETDETAILS_ARCH STREQUAL "x86") AND NOT (TARGETDETAILS_OS STREQUAL "unix"))) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS) - target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_SIMD) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_HW_INTRINSICS) endif () endfunction() @@ -83,7 +82,6 @@ endfunction() if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX)) add_compile_definitions($<$>>:FEATURE_SIMD>) add_compile_definitions($<$>>:FEATURE_HW_INTRINSICS>) - add_compile_definitions($<$>>:FEATURE_MASKED_SIMD>) add_compile_definitions($<$>>:FEATURE_MASKED_HW_INTRINSICS>) endif () diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a4d9f57e4d9413..4d3c37e3f40685 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -778,11 +778,11 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, { arg = impSIMDPopStack(); } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) assert(varTypeIsSIMD(arg) || varTypeIsMask(arg)); #else assert(varTypeIsSIMD(arg)); -#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS } else { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 9ab43dd3a88462..47765c88e11dc3 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6419,7 +6419,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local")); } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) // Masks must be converted to vectors before being stored to memory. // But, for local stores we can optimise away the conversion if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector) @@ -6428,7 +6428,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) lvaTable[lclNum].lvType = TYP_MASK; lclTyp = lvaGetActualType(lclNum); } -#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS op1 = gtNewStoreLclVarNode(lclNum, op1); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index d50051e5089922..2027994f9ad1be 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1680,7 +1680,7 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) return ins; } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(srcType)) { #if defined(TARGET_XARCH) @@ -1689,7 +1689,7 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) return INS_mov; #endif } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(srcType)); @@ -1834,7 +1834,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return ins; } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(srcType)) { #if defined(TARGET_XARCH) @@ -1843,7 +1843,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return INS_sve_ldr; #endif } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(srcType)); @@ -1922,7 +1922,7 @@ instruction CodeGen::ins_Copy(var_types dstType) #endif } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { #if defined(TARGET_XARCH) @@ -1931,7 +1931,7 @@ instruction CodeGen::ins_Copy(var_types dstType) return INS_mov; #endif } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); @@ -2035,7 +2035,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { if (genIsValidMaskReg(srcReg)) @@ -2052,7 +2052,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return INS_mov; #endif } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); @@ -2154,7 +2154,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return ins; } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { #if defined(TARGET_XARCH) @@ -2163,7 +2163,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return INS_sve_str; #endif } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); @@ -2275,7 +2275,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy return ins_Store(dstType, aligned); } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) if (varTypeUsesMaskReg(dstType)) { if (genIsValidMaskReg(srcReg)) @@ -2288,7 +2288,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy assert(genIsValidIntOrFakeReg(srcReg)); return ins_Store(dstType, aligned); } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index a9955d4bd6b792..fae45e5cd2a232 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -457,11 +457,11 @@ GenTree* Compiler::impSIMDPopStack() StackEntry se = impPopStack(); GenTree* tree = se.val; -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD) +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) assert(varTypeIsSIMD(tree) || varTypeIsMask(tree)); #else assert(varTypeIsSIMD(tree)); -#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS // Handle calls that may return the struct via a return buffer. if (tree->OperIs(GT_CALL, GT_RET_EXPR)) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index aeac5cdfed2653..7897e0c7fc9323 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -393,7 +393,7 @@ inline bool genIsValidFloatReg(regNumber reg) return reg >= REG_FP_FIRST && reg <= REG_FP_LAST; } -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) /***************************************************************************** * Return true if the register is a valid mask register */ @@ -401,7 +401,7 @@ inline bool genIsValidMaskReg(regNumber reg) { return reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST; } -#endif // FEATURE_MASKED_SIMD +#endif // FEATURE_MASKED_HW_INTRINSICS #ifdef TARGET_ARM diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 6f4096d3a71133..9742f87d814730 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -85,9 +85,9 @@ inline bool varTypeIsSIMD(T vt) template inline bool varTypeIsMask(T vt) { -#if defined(FEATURE_MASKED_SIMD) +#if defined(FEATURE_MASKED_HW_INTRINSICS) return (TypeGet(vt) == TYP_MASK); -#else // FEATURE_MASKED_SIMD +#else // FEATURE_MASKED_HW_INTRINSICS return false; #endif } From 02fa227df75fab2b0b5f55aea53cede5235c3bb6 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 09:57:29 +0000 Subject: [PATCH 03/26] More generic ifdefs --- src/coreclr/jit/hwintrinsic.cpp | 4 ++-- src/coreclr/jit/scopeinfo.cpp | 8 ++++---- src/coreclr/jit/simd.cpp | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 4d3c37e3f40685..471b5daf7af644 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -778,11 +778,11 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, { arg = impSIMDPopStack(); } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) +#if defined(FEATURE_MASKED_HW_INTRINSICS) assert(varTypeIsSIMD(arg) || varTypeIsMask(arg)); #else assert(varTypeIsSIMD(arg)); -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS +#endif // FEATURE_MASKED_HW_INTRINSICS } else { diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index a368d295250485..06c4dd244a4be5 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -301,9 +301,9 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_LONG: case TYP_DOUBLE: #endif // TARGET_64BIT -#if defined(TARGET_ARM64) +#if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: -#endif // TARGET_ARM64 +#endif // FEATURE_MASKED_HW_INTRINSICS #if FEATURE_IMPLICIT_BYREFS // In the AMD64 ABI we are supposed to pass a struct by reference when its // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies @@ -436,9 +436,9 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH -#if defined(TARGET_ARM64) +#if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: -#endif // TARGET_ARM64 +#endif // FEATURE_MASKED_HW_INTRINSICS { this->vlType = VLT_REG_FP; diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index fae45e5cd2a232..74b2f042e1fb6e 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -457,11 +457,11 @@ GenTree* Compiler::impSIMDPopStack() StackEntry se = impPopStack(); GenTree* tree = se.val; -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) +#if defined(FEATURE_MASKED_HW_INTRINSICS) assert(varTypeIsSIMD(tree) || varTypeIsMask(tree)); #else assert(varTypeIsSIMD(tree)); -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS +#endif // FEATURE_MASKED_HW_INTRINSICS // Handle calls that may return the struct via a return buffer. if (tree->OperIs(GT_CALL, GT_RET_EXPR)) From 2e2e17418045009154ada2ed2236a8a8ab219347 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 10:14:31 +0000 Subject: [PATCH 04/26] Add varTypeIsSIMDOrMask --- src/coreclr/jit/hwintrinsic.cpp | 6 +----- src/coreclr/jit/simd.cpp | 6 +----- src/coreclr/jit/vartype.h | 6 ++++++ 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 471b5daf7af644..9b09c9d5382499 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -778,11 +778,7 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, { arg = impSIMDPopStack(); } -#if defined(FEATURE_MASKED_HW_INTRINSICS) - assert(varTypeIsSIMD(arg) || varTypeIsMask(arg)); -#else - assert(varTypeIsSIMD(arg)); -#endif // FEATURE_MASKED_HW_INTRINSICS + assert(varTypeIsSIMDOrMask(arg)); } else { diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 74b2f042e1fb6e..b6879dafc498d9 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -457,11 +457,7 @@ GenTree* Compiler::impSIMDPopStack() StackEntry se = impPopStack(); GenTree* tree = se.val; -#if defined(FEATURE_MASKED_HW_INTRINSICS) - assert(varTypeIsSIMD(tree) || varTypeIsMask(tree)); -#else - assert(varTypeIsSIMD(tree)); -#endif // FEATURE_MASKED_HW_INTRINSICS + assert(varTypeIsSIMDOrMask(tree)); // Handle calls that may return the struct via a return buffer. if (tree->OperIs(GT_CALL, GT_RET_EXPR)) diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 9742f87d814730..1623addb69b079 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -92,6 +92,12 @@ inline bool varTypeIsMask(T vt) #endif } +template +inline bool varTypeIsSIMDOrMask(T vt) +{ + return varTypeIsSIMD(vt) || varTypeIsMask(vt); +} + template inline bool varTypeIsIntegral(T vt) { From fcdb18ad52d756d4b956911d706879e1b4ded26c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 12:11:55 +0000 Subject: [PATCH 05/26] Add extra type checks --- src/coreclr/jit/hwintrinsic.cpp | 4 ++-- src/coreclr/jit/hwintrinsicarm64.cpp | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 9b09c9d5382499..168524432c6f21 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1591,12 +1591,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - if (HWIntrinsicInfo::IsMaskedOperation(intrinsic)) { assert(numArgs > 0); GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); - if (op1->TypeGet() != TYP_MASK) + if (!varTypeIsMask(op1)) { // Op1 input is a vector. HWInstrinsic requires a mask. retNode->AsHWIntrinsic()->Op(1) = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); @@ -1607,6 +1606,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); + assert(nodeRetType == TYP_MASK); retNode = convertHWIntrinsicFromMask(retNode->AsHWIntrinsic(), retType); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5c7f796c61c909..c10e5a21dcd751 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2219,6 +2219,8 @@ GenTree* Compiler::convertHWIntrinsicToMask(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { + assert(varTypeIsSIMD(node)); + // ConvertVectorToMask uses cmpne which requires an embedded mask. GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, @@ -2237,7 +2239,9 @@ GenTree* Compiler::convertHWIntrinsicToMask(var_types type, // GenTree* Compiler::convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type) { - assert(node->TypeGet() == TYP_MASK); + assert(varTypeIsMask(node)); + assert(varTypeIsSIMD(type)); + return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), node->GetSimdSize()); } From 1fc8d5bb303e9f26e44c4d4300af41b60d225ebc Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 15:35:19 +0000 Subject: [PATCH 06/26] Fix use of isValidSimm9, and add extra uses --- src/coreclr/jit/emitarm64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index cc3322ceae11fe..69923f10323eb8 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5932,7 +5932,7 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) if (imm == 0) return true; // Encodable using IF_LS_2A - if ((imm >= -256) && (imm <= 255)) + if (isValidSimm<9>(imm)) return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B) if (imm < 0) @@ -10727,7 +10727,7 @@ void emitter::emitIns_R_R_I(instruction ins, } else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0)) { - if ((imm >= -256) && (imm <= 255)) + if (isValidSimm<9>(imm)) { fmt = IF_LS_2C; } @@ -17438,7 +17438,7 @@ void emitter::emitIns_R_S(instruction ins, } else if ((imm < 0) || ((imm & mask) != 0)) { - if ((imm >= -256) && (imm <= 255)) + if (isValidSimm<9>(imm)) { scalarfmt = IF_LS_2C; } @@ -17710,7 +17710,7 @@ void emitter::emitIns_S_R(instruction ins, } else if ((imm < 0) || ((imm & mask) != 0)) { - if (isValidSimm9(imm)) + if (isValidSimm<9>(imm)) { scalarfmt = IF_LS_2C; } From 9dbfe63be7a3118cda43c9319c3a44d9fee50b88 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 15:41:25 +0000 Subject: [PATCH 07/26] Rename mask conversion functions to gtNewSimdConvert* --- src/coreclr/jit/compiler.h | 10 +++++----- src/coreclr/jit/hwintrinsic.cpp | 4 ++-- src/coreclr/jit/hwintrinsicarm64.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5b18ebaeabae33..b0eb2586da6831 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3446,6 +3446,11 @@ class Compiler GenTreeIndir* gtNewMethodTableLookup(GenTree* obj); +#if defined(TARGET_ARM64) + GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type); +#endif + //------------------------------------------------------------------------ // Other GenTree functions @@ -4556,11 +4561,6 @@ class Compiler NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound); GenTree* addRangeCheckForHWIntrinsic(GenTree* immOp, int immLowerBound, int immUpperBound); -#if defined(TARGET_ARM64) - GenTree* convertHWIntrinsicToMask(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type); -#endif - #endif // FEATURE_HW_INTRINSICS GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 168524432c6f21..e8b60b07909d95 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1598,7 +1598,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsMask(op1)) { // Op1 input is a vector. HWInstrinsic requires a mask. - retNode->AsHWIntrinsic()->Op(1) = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize); } } @@ -1607,7 +1607,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); assert(nodeRetType == TYP_MASK); - retNode = convertHWIntrinsicFromMask(retNode->AsHWIntrinsic(), retType); + retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c10e5a21dcd751..ce3899dd568e63 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2204,7 +2204,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// convertHWIntrinsicFromMask: Convert a HW instrinsic vector node to a mask +// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask // // Arguments: // node -- The node to convert @@ -2214,7 +2214,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // Return Value: // The node converted to the a mask type // -GenTree* Compiler::convertHWIntrinsicToMask(var_types type, +GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize) @@ -2228,7 +2228,7 @@ GenTree* Compiler::convertHWIntrinsicToMask(var_types type, } //------------------------------------------------------------------------ -// convertHWIntrinsicFromMask: Convert a HW instrinsic mask node to a vector +// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector // // Arguments: // node -- The node to convert @@ -2237,7 +2237,7 @@ GenTree* Compiler::convertHWIntrinsicToMask(var_types type, // Return Value: // The node converted to the given type // -GenTree* Compiler::convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type) +GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type) { assert(varTypeIsMask(node)); assert(varTypeIsSIMD(type)); From 85f09bfd377f5c65328d76816ae8e2066c282132 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Mar 2024 15:56:12 +0000 Subject: [PATCH 08/26] Add OperIs functions --- src/coreclr/jit/gentree.h | 19 +++++++++++++++++++ src/coreclr/jit/importer.cpp | 6 +++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index fb419cefbce6d4..2f13829b0933c4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6396,6 +6396,25 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsBitwiseHWIntrinsic() const; bool OperIsEmbRoundingEnabled() const; + + bool OperIsConvertMaskToVector() const + { +#if defined(TARGET_ARM64) + return GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector; +#else + return false; +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS + } + + bool OperIsConvertVectorToMask() const + { +#if defined(TARGET_ARM64) + return GetHWIntrinsicId() == NI_Sve_ConvertVectorToMask; +#else + return false; +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS + } + bool OperRequiresAsgFlag() const; bool OperRequiresCallFlag() const; bool OperRequiresGlobRefFlag() const; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a25c69a0dd6aeb..2b48489f196e77 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6435,9 +6435,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) - // Masks must be converted to vectors before being stored to memory. - // But, for local stores we can optimise away the conversion - if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector) + // Masks which as stored to locals as a vector can remove the conversion step as type mask is + // the typical consumption. + if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->OperIsConvertMaskToVector()) { op1 = op1->AsHWIntrinsic()->Op(1); lvaTable[lclNum].lvType = TYP_MASK; From 7945d5189cf6bff1bf0b6e97331155282980ae27 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 10:32:51 +0000 Subject: [PATCH 09/26] Mark untested uses of mov --- src/coreclr/jit/instr.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 2027994f9ad1be..33ed0a296f769b 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1686,7 +1686,8 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_mov; + unreached(); // TODO-SVE: This needs testing + return INS_sve_mov; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -1928,7 +1929,8 @@ instruction CodeGen::ins_Copy(var_types dstType) #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_mov; + unreached(); // TODO-SVE: This needs testing + return INS_sve_mov; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2049,7 +2051,8 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #if defined(TARGET_XARCH) return INS_kmovq_gpr; #elif defined(TARGET_ARM64) - return INS_mov; + unreached(); // TODO-SVE: This needs testing + return INS_sve_mov; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS From bd5d951448933ebbedbe60fc50e5ec38bd46ec1d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 11:50:46 +0000 Subject: [PATCH 10/26] Add INS_SCALABLE_OPTS_PREDICATE_DEST --- src/coreclr/jit/codegenarm64.cpp | 34 +++++++++++++------------- src/coreclr/jit/emitarm64.cpp | 36 +++++++++++++++------------- src/coreclr/jit/gentree.h | 1 - src/coreclr/jit/hwintrinsicarm64.cpp | 6 ++--- src/coreclr/jit/instr.h | 3 ++- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f690baf75d66b8..73035c7fde1630 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2767,20 +2767,19 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) // targetType must be a normal scalar type and not a TYP_STRUCT assert(targetType != TYP_STRUCT); - instruction ins = ins_Load(targetType); - emitAttr attr = emitActualTypeSize(targetType); + instruction ins = ins_Load(targetType); + emitAttr attr = emitActualTypeSize(targetType); + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE; + emitter* emit = GetEmitter(); - emitter* emit = GetEmitter(); - - if (ins == INS_sve_ldr && !varTypeUsesMaskReg(targetType)) + // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct + if (varTypeUsesMaskReg(targetType)) { - emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0, INS_SCALABLE_OPTS_UNPREDICATED); - } - else - { - emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + sopt = INS_SCALABLE_OPTS_PREDICATE_DEST; } + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0, sopt); + genProduceReg(tree); } } @@ -2962,18 +2961,17 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { inst_set_SV_var(lclNode); - instruction ins = ins_StoreFromSrc(dataReg, targetType); - emitAttr attr = emitActualTypeSize(targetType); + instruction ins = ins_StoreFromSrc(dataReg, targetType); + emitAttr attr = emitActualTypeSize(targetType); + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE; // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct - if (ins == INS_sve_str && !varTypeUsesMaskReg(targetType)) + if (varTypeUsesMaskReg(targetType)) { - emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0, INS_SCALABLE_OPTS_UNPREDICATED); - } - else - { - emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + sopt = INS_SCALABLE_OPTS_PREDICATE_DEST; } + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0, sopt); } else // store into register (i.e move into register) { diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 69923f10323eb8..040a7bdefdab97 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17368,19 +17368,20 @@ void emitter::emitIns_R_S(instruction ins, attr = size; // TODO-SVE: Use register number instead of enum - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + // TODO-SVE: Don't assume 128bit vectors + if (sopt == INS_SCALABLE_OPTS_PREDICATE_DEST) { - fmt = IF_SVE_IE_2A; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); + assert(isPredicateRegister(reg1)); + fmt = IF_SVE_ID_2A; + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); } else { assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_ID_2A; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + assert(isVectorRegister(reg1)); + fmt = IF_SVE_IE_2A; + scale = NaturalScale_helper(EA_16BYTE); } break; @@ -17653,24 +17654,25 @@ void emitter::emitIns_S_R(instruction ins, assert(isVectorRegister(reg1) || isPredicateRegister(reg1)); isScalable = true; - // TODO-SVE: This should probably be set earlier in the caller + // TODO-SVE: This should probably be set in the caller size = EA_SCALABLE; attr = size; // TODO-SVE: Use register number instead of enum - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + // TODO-SVE: Don't assume 128bit vectors + if (sopt == INS_SCALABLE_OPTS_PREDICATE_DEST) { - fmt = IF_SVE_JH_2A; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); + assert(isPredicateRegister(reg1)); + fmt = IF_SVE_JG_2A; + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); } else { assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_JG_2A; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + assert(isVectorRegister(reg1)); + fmt = IF_SVE_JH_2A; + scale = NaturalScale_helper(EA_16BYTE); } break; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2f13829b0933c4..45bef842f31f92 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6396,7 +6396,6 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsBitwiseHWIntrinsic() const; bool OperIsEmbRoundingEnabled() const; - bool OperIsConvertMaskToVector() const { #if defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ce3899dd568e63..385dfe4bc82bf7 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2215,9 +2215,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // The node converted to the a mask type // GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type, - GenTree* node, - CorInfoType simdBaseJitType, - unsigned simdSize) + GenTree* node, + CorInfoType simdBaseJitType, + unsigned simdSize) { assert(varTypeIsSIMD(node)); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index c23cb258d46315..97526f5bb6d83a 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -385,7 +385,8 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov) INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov) - INS_SCALABLE_OPTS_BROADCAST // Used to distinguish mov from cpy, where mov is an alias for both + INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both + INS_SCALABLE_OPTS_PREDICATE_DEST // Variants with a predicate destination }; // Maps directly to the pattern used in SVE instructions such as cntb. From ce61a404d33bc2b647c1ceb69daa24abdd4069af Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 13:01:51 +0000 Subject: [PATCH 11/26] Valuenum fixes for tier 1 --- src/coreclr/jit/valuenum.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 79cc5548355401..4f2705bba94a68 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12032,9 +12032,6 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero if (opCount == 0) { - // Currently we don't have intrinsics with variable number of args with a parameter-less option. - assert(!isVariableNumArgs); - if (encodeResultType) { // There are zero arg HWINTRINSICS operations that encode the result type, i.e. Vector128_AllBitSet From b5502a6968c1304986f206ea6ac9de9d2fb63f7d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 13:02:57 +0000 Subject: [PATCH 12/26] Remove importer changes --- src/coreclr/jit/importer.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2b48489f196e77..49bdd4e5e77576 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6434,17 +6434,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local")); } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) - // Masks which as stored to locals as a vector can remove the conversion step as type mask is - // the typical consumption. - if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->OperIsConvertMaskToVector()) - { - op1 = op1->AsHWIntrinsic()->Op(1); - lvaTable[lclNum].lvType = TYP_MASK; - lclTyp = lvaGetActualType(lclNum); - } -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS - op1 = gtNewStoreLclVarNode(lclNum, op1); // TODO-ASG: delete this zero-diff quirk. Requires some forward substitution work. From 39c02d00044d10eaa6c7994ca5518c7ea91e5e82 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 16:18:20 +0000 Subject: [PATCH 13/26] XARCH versions of OperIsConvertMaskToVector --- src/coreclr/jit/gentree.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 45bef842f31f92..77bd28c6adffce 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6398,7 +6398,9 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertMaskToVector() const { -#if defined(TARGET_ARM64) +#if defined(TARGET_XARCH) + return GetHWIntrinsicId() == NI_AVX512F_ConvertMaskToVector; +#elif defined(TARGET_ARM64) return GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector; #else return false; @@ -6407,11 +6409,13 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertVectorToMask() const { -#if defined(TARGET_ARM64) +#if defined(TARGET_XARCH) + return GetHWIntrinsicId() == NI_AVX512F_ConvertVectorToMask; +#elif defined(TARGET_ARM64) return GetHWIntrinsicId() == NI_Sve_ConvertVectorToMask; #else return false; -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS +#endif } bool OperRequiresAsgFlag() const; From d8dea0e83c2318a4638d9beea11d3d188c2d5fa2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 16:21:54 +0000 Subject: [PATCH 14/26] Revert "Remove importer changes" This reverts commit b5502a6968c1304986f206ea6ac9de9d2fb63f7d. --- src/coreclr/jit/importer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 49bdd4e5e77576..2b48489f196e77 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6434,6 +6434,17 @@ void Compiler::impImportBlockCode(BasicBlock* block) impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local")); } +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) + // Masks which as stored to locals as a vector can remove the conversion step as type mask is + // the typical consumption. + if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->OperIsConvertMaskToVector()) + { + op1 = op1->AsHWIntrinsic()->Op(1); + lvaTable[lclNum].lvType = TYP_MASK; + lclTyp = lvaGetActualType(lclNum); + } +#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS + op1 = gtNewStoreLclVarNode(lclNum, op1); // TODO-ASG: delete this zero-diff quirk. Requires some forward substitution work. From 8ec8e380f95450a268c922087f6ab1d42769ac1d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 Mar 2024 17:12:50 +0000 Subject: [PATCH 15/26] Add tests fopr emitIns_S_R and emitIns_R_S --- src/coreclr/jit/codegenarm64test.cpp | 8 ++++++++ src/coreclr/jit/emitarm64.cpp | 14 ++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3c02c968fe919d..107d44bb76e47f 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -8551,6 +8551,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, -25); theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, -256); theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, 255); + theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_P0, 1, 0, INS_SCALABLE_OPTS_PREDICATE_DEST); + theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_P15, 1, 6, INS_SCALABLE_OPTS_PREDICATE_DEST); // IF_SVE_JG_2A // STR , [{, #, MUL VL}] @@ -8559,6 +8561,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, -117); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, -256); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, 255); + theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_P5, 1, 0, INS_SCALABLE_OPTS_PREDICATE_DEST); + theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_P7, 1, 4, INS_SCALABLE_OPTS_PREDICATE_DEST); // IF_SVE_IE_2A // LDR , [{, #, MUL VL}] @@ -8572,6 +8576,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_UNPREDICATED); theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 255, INS_OPTS_NONE, INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_V17, 1, 0); + theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_V9, 1, 24); // IF_SVE_JH_2A // STR , [{, #, MUL VL}] @@ -8585,6 +8591,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_UNPREDICATED); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE, INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_V3, 1, 0); + theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_V0, 1, 28); #ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_GG_3A diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 040a7bdefdab97..948d5b41591dae 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2660,10 +2660,9 @@ void emitter::emitInsSanityCheck(instrDesc* id) elemsize = id->idOpSize(); assert(insOptsNone(id->idInsOpt())); assert(isScalableVectorSize(elemsize)); - assert(isPredicateRegister(id->idReg1())); // TTTT - assert(isGeneralRegister(id->idReg2())); // nnnnn - assert(isValidSimm<9>(emitGetInsSC(id))); // iii - // iiiiii + assert(isPredicateRegister(id->idReg1())); // TTTT + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<9>(emitGetInsSC(id))); // iii break; case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register @@ -2671,10 +2670,9 @@ void emitter::emitInsSanityCheck(instrDesc* id) elemsize = id->idOpSize(); assert(insOptsNone(id->idInsOpt())); assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isGeneralRegister(id->idReg2())); // nnnnn - assert(isValidSimm<9>(emitGetInsSC(id))); // iii - // iiiiii + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<9>(emitGetInsSC(id))); // iii break; case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit From 3ec441c3d15feb64d93a0458598eca67120da829 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 Mar 2024 10:21:19 +0000 Subject: [PATCH 16/26] Fix formatting --- src/coreclr/jit/emitarm64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 948d5b41591dae..6530c3554ee418 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2670,9 +2670,9 @@ void emitter::emitInsSanityCheck(instrDesc* id) elemsize = id->idOpSize(); assert(insOptsNone(id->idInsOpt())); assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn - assert(isValidSimm<9>(emitGetInsSC(id))); // iii + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<9>(emitGetInsSC(id))); // iii break; case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit From f5695120bf9fce99196e5872a2f4659360e47388 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 Mar 2024 10:37:18 +0000 Subject: [PATCH 17/26] Reapply "Remove importer changes" This reverts commit d8dea0e83c2318a4638d9beea11d3d188c2d5fa2. --- src/coreclr/jit/importer.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2b48489f196e77..49bdd4e5e77576 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6434,17 +6434,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local")); } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) - // Masks which as stored to locals as a vector can remove the conversion step as type mask is - // the typical consumption. - if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->OperIsConvertMaskToVector()) - { - op1 = op1->AsHWIntrinsic()->Op(1); - lvaTable[lclNum].lvType = TYP_MASK; - lclTyp = lvaGetActualType(lclNum); - } -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS - op1 = gtNewStoreLclVarNode(lclNum, op1); // TODO-ASG: delete this zero-diff quirk. Requires some forward substitution work. From 01101709d667296ba6afde03d6ed84c5d159dc9f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 Mar 2024 08:53:02 +0000 Subject: [PATCH 18/26] Use dummy mask ldr and str --- src/coreclr/jit/codegenarm64.cpp | 29 +++------ src/coreclr/jit/codegenarm64test.cpp | 8 +-- src/coreclr/jit/emitarm64.cpp | 92 +++++++++++++--------------- src/coreclr/jit/emitarm64.h | 6 +- src/coreclr/jit/instr.cpp | 4 +- src/coreclr/jit/instr.h | 5 +- 6 files changed, 61 insertions(+), 83 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 73035c7fde1630..81370e6413835f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2767,19 +2767,11 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) // targetType must be a normal scalar type and not a TYP_STRUCT assert(targetType != TYP_STRUCT); - instruction ins = ins_Load(targetType); - emitAttr attr = emitActualTypeSize(targetType); - insScalableOpts sopt = INS_SCALABLE_OPTS_NONE; - emitter* emit = GetEmitter(); - - // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct - if (varTypeUsesMaskReg(targetType)) - { - sopt = INS_SCALABLE_OPTS_PREDICATE_DEST; - } - - emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0, sopt); + instruction ins = ins_Load(targetType); + emitAttr attr = emitActualTypeSize(targetType); + emitter* emit = GetEmitter(); + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); genProduceReg(tree); } } @@ -2961,17 +2953,10 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { inst_set_SV_var(lclNode); - instruction ins = ins_StoreFromSrc(dataReg, targetType); - emitAttr attr = emitActualTypeSize(targetType); - insScalableOpts sopt = INS_SCALABLE_OPTS_NONE; - - // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct - if (varTypeUsesMaskReg(targetType)) - { - sopt = INS_SCALABLE_OPTS_PREDICATE_DEST; - } + instruction ins = ins_StoreFromSrc(dataReg, targetType); + emitAttr attr = emitActualTypeSize(targetType); - emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0, sopt); + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); } else // store into register (i.e move into register) { diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 107d44bb76e47f..66e99940b6bd67 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -8551,8 +8551,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, -25); theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, -256); theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, 255); - theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_P0, 1, 0, INS_SCALABLE_OPTS_PREDICATE_DEST); - theEmitter->emitIns_R_S(INS_sve_ldr, EA_8BYTE, REG_P15, 1, 6, INS_SCALABLE_OPTS_PREDICATE_DEST); + theEmitter->emitIns_R_S(INS_sve_ldr_mask, EA_8BYTE, REG_P0, 1, 0); + theEmitter->emitIns_R_S(INS_sve_ldr_mask, EA_8BYTE, REG_P15, 1, 6); // IF_SVE_JG_2A // STR , [{, #, MUL VL}] @@ -8561,8 +8561,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, -117); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, -256); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_P3, REG_R1, 255); - theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_P5, 1, 0, INS_SCALABLE_OPTS_PREDICATE_DEST); - theEmitter->emitIns_S_R(INS_sve_str, EA_8BYTE, REG_P7, 1, 4, INS_SCALABLE_OPTS_PREDICATE_DEST); + theEmitter->emitIns_S_R(INS_sve_str_mask, EA_8BYTE, REG_P5, 1, 0); + theEmitter->emitIns_S_R(INS_sve_str_mask, EA_8BYTE, REG_P7, 1, 4); // IF_SVE_IE_2A // LDR , [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6530c3554ee418..b349ca14aaf1df 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17309,12 +17309,7 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) * * Add an instruction referencing a register and a stack-based local variable. */ -void emitter::emitIns_R_S(instruction ins, - emitAttr attr, - regNumber reg1, - int varx, - int offs, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; @@ -17358,29 +17353,30 @@ void emitter::emitIns_R_S(instruction ins, break; case INS_sve_ldr: - assert(isVectorRegister(reg1) || isPredicateRegister(reg1)); + assert(isPredicateRegister(reg1)); isScalable = true; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_IE_2A; + + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + + break; - // TODO-SVE: This should probably be set earlier in the caller - size = EA_SCALABLE; - attr = size; + // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct + case INS_sve_ldr_mask: + assert(isPredicateRegister(reg1)); + isScalable = true; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_ID_2A; + ins = INS_sve_ldr; - // TODO-SVE: Use register number instead of enum // TODO-SVE: Don't assume 128bit vectors - if (sopt == INS_SCALABLE_OPTS_PREDICATE_DEST) - { - assert(isPredicateRegister(reg1)); - fmt = IF_SVE_ID_2A; - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - } - else - { - assert(insScalableOptsNone(sopt)); - assert(isVectorRegister(reg1)); - fmt = IF_SVE_IE_2A; - scale = NaturalScale_helper(EA_16BYTE); - } + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + break; default: @@ -17602,12 +17598,7 @@ void emitter::emitIns_R_R_S_S( * * Add an instruction referencing a stack-based local variable and a register */ -void emitter::emitIns_S_R(instruction ins, - emitAttr attr, - regNumber reg1, - int varx, - int offs, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { assert(offs >= 0); emitAttr size = EA_SIZE(attr); @@ -17649,29 +17640,30 @@ void emitter::emitIns_S_R(instruction ins, break; case INS_sve_str: - assert(isVectorRegister(reg1) || isPredicateRegister(reg1)); + assert(isVectorRegister(reg1)); isScalable = true; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JH_2A; + + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + + break; - // TODO-SVE: This should probably be set in the caller - size = EA_SCALABLE; - attr = size; + // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct + case INS_sve_str_mask: + assert(isPredicateRegister(reg1)); + isScalable = true; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JG_2A; + ins = INS_sve_str; - // TODO-SVE: Use register number instead of enum // TODO-SVE: Don't assume 128bit vectors - if (sopt == INS_SCALABLE_OPTS_PREDICATE_DEST) - { - assert(isPredicateRegister(reg1)); - fmt = IF_SVE_JG_2A; - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - } - else - { - assert(insScalableOptsNone(sopt)); - assert(isVectorRegister(reg1)); - fmt = IF_SVE_JH_2A; - scale = NaturalScale_helper(EA_16BYTE); - } + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + break; default: diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 2296d68ec9b30c..bf603155b36fcc 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1604,8 +1604,7 @@ void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); -void emitIns_S_R( - instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); +void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); void emitIns_S_S_R_R( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); @@ -1623,8 +1622,7 @@ void emitIns_R_R_R_I_LdStPair(instruction ins, int offs2 = -1 DEBUG_ARG(unsigned var1RefsOffs = BAD_IL_OFFSET) DEBUG_ARG(unsigned var2RefsOffs = BAD_IL_OFFSET)); -void emitIns_R_S( - instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); +void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); void emitIns_R_R_S_S( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 33ed0a296f769b..33ccbfddb1ad5c 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1841,7 +1841,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_sve_ldr; + return INS_sve_ldr_mask; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2163,7 +2163,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_sve_str; + return INS_sve_str_mask; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 97526f5bb6d83a..491287a2c51186 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -66,6 +66,10 @@ enum instruction : uint32_t INS_lea, // Not a real instruction. It is used for load the address of stack locals + // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct + INS_sve_str_mask, // Not a real instruction. It is used to load masks from the stack + INS_sve_ldr_mask, // Not a real instruction. It is used to store masks to the stack + #elif defined(TARGET_LOONGARCH64) #define INST(id, nm, ldst, e1, msk, fmt) INS_##id, #include "instrs.h" @@ -386,7 +390,6 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov) INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov) INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both - INS_SCALABLE_OPTS_PREDICATE_DEST // Variants with a predicate destination }; // Maps directly to the pattern used in SVE instructions such as cntb. From ec05e34e39eae368e19be8d335b731c94faa68ad Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 Mar 2024 15:04:52 +0000 Subject: [PATCH 19/26] Refactor emitIns_S_R and emitIns_R_S --- src/coreclr/jit/emitarm64.cpp | 305 ++++++++++++++++++---------------- 1 file changed, 166 insertions(+), 139 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b349ca14aaf1df..6d669e33df660d 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17313,13 +17313,21 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va { emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; - int disp = 0; unsigned scale = 0; bool isLdrStr = false; - bool isScalable = false; + bool isSimple = true; + bool useRegForImm = false; assert(offs >= 0); + /* Figure out the variable's frame position */ + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm; + + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ switch (ins) @@ -17349,34 +17357,85 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_lea: assert(size == EA_8BYTE); + isSimple = false; scale = 0; + + if (disp >= 0) + { + ins = INS_add; + imm = disp; + } + else + { + ins = INS_sub; + imm = -disp; + } + + if (imm <= 0x0fff) + { + fmt = IF_DI_2A; // add reg1,reg2,#disp + } + else + { + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + fmt = IF_DR_3A; // add reg1,reg2,rsvdReg + } break; case INS_sve_ldr: - assert(isPredicateRegister(reg1)); - isScalable = true; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_IE_2A; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_IE_2A; + imm = disp; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } break; // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct case INS_sve_ldr_mask: - assert(isPredicateRegister(reg1)); - isScalable = true; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_ID_2A; - ins = INS_sve_ldr; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_ID_2A; + ins = INS_sve_ldr; + imm = disp; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } break; default: @@ -17385,57 +17444,21 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } // end switch (ins) - /* Figure out the variable's frame position */ - ssize_t imm; - int base; - bool FPbased; - insFormat scalarfmt = fmt; - - base = emitComp->lvaFrameAddress(varx, &FPbased); - disp = base + offs; assert((scale >= 0) && (scale <= 4)); - bool useRegForImm = false; - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - reg2 = encodingSPtoZR(reg2); - - if (ins == INS_lea) - { - if (disp >= 0) - { - ins = INS_add; - imm = disp; - } - else - { - ins = INS_sub; - imm = -disp; - } - - if (imm <= 0x0fff) - { - scalarfmt = IF_DI_2A; // add reg1,reg2,#disp - } - else - { - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - scalarfmt = IF_DR_3A; // add reg1,reg2,rsvdReg - } - } - else + if (isSimple) { ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate imm = disp; if (imm == 0) { - scalarfmt = IF_LS_2A; + fmt = IF_LS_2A; } else if ((imm < 0) || ((imm & mask) != 0)) { if (isValidSimm<9>(imm)) { - scalarfmt = IF_LS_2C; + fmt = IF_LS_2C; } else { @@ -17444,13 +17467,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } else if (imm > 0) { - // TODO: We should be able to scale values <0 for all variants. - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) { imm >>= scale; // The immediate is scaled by the size of the ld/st - scalarfmt = IF_LS_2B; + fmt = IF_LS_2B; } else { @@ -17462,15 +17483,10 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va { regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - scalarfmt = IF_LS_3A; + fmt = IF_LS_3A; } } - // Set the format based on the immediate encoding - if (!isScalable) - { - fmt = scalarfmt; - } assert(fmt != IF_NONE); // Try to optimize a load/store with an alternative instruction. @@ -17603,11 +17619,20 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va assert(offs >= 0); emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; - int disp = 0; unsigned scale = 0; bool isVectorStore = false; bool isStr = false; - bool isScalable = false; + bool isSimple = true; + bool useRegForImm = false; + + /* Figure out the variable's frame position */ + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm = disp; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ @@ -17640,30 +17665,56 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va break; case INS_sve_str: - assert(isVectorRegister(reg1)); - isScalable = true; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JH_2A; + { + assert(isVectorRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JH_2A; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } break; // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct case INS_sve_str_mask: - assert(isPredicateRegister(reg1)); - isScalable = true; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JG_2A; - ins = INS_sve_str; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JG_2A; + ins = INS_sve_str; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } break; default: @@ -17672,74 +17723,50 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va } // end switch (ins) - /* Figure out the variable's frame position */ - int base; - bool FPbased; + assert(scale <= (isVectorStore || !isSimple) ? 4 : 3); - base = emitComp->lvaFrameAddress(varx, &FPbased); - disp = base + offs; - assert(scale >= 0); - if (isVectorStore || isScalable) + if (isSimple) { - assert(scale <= 4); - } - else - { - assert(scale <= 3); - } - - // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - reg2 = encodingSPtoZR(reg2); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - bool useRegForImm = false; - ssize_t imm = disp; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - insFormat scalarfmt = fmt; - if (imm == 0) - { - scalarfmt = IF_LS_2A; - } - else if ((imm < 0) || ((imm & mask) != 0)) - { - if (isValidSimm<9>(imm)) + if (imm == 0) { - scalarfmt = IF_LS_2C; + fmt = IF_LS_2A; } - else + else if ((imm < 0) || ((imm & mask) != 0)) { - useRegForImm = true; + if (isValidSimm<9>(imm)) + { + fmt = IF_LS_2C; + } + else + { + useRegForImm = true; + } } - } - else if (imm > 0) - { - // TODO: We should be able to scale values <0 for all variants. - - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + else if (imm > 0) { - imm >>= scale; // The immediate is scaled by the size of the ld/st - scalarfmt = IF_LS_2B; + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + fmt = IF_LS_2B; + } + else + { + useRegForImm = true; + } } - else + + if (useRegForImm) { - useRegForImm = true; + // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar. + // It is instead implicit when idSetIsLclVar() is set, with this encoding format. + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + fmt = IF_LS_3A; } } - if (useRegForImm) - { - // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar. - // It is instead implicit when idSetIsLclVar() is set, with this encoding format. - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - scalarfmt = IF_LS_3A; - } - - // Set the format based on the immediate encoding - if (!isScalable) - { - fmt = scalarfmt; - } assert(fmt != IF_NONE); // Try to optimize a store with an alternative instruction. From 71bcb481bfca762cfbb43f39b01609b9cc89251a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 Mar 2024 16:09:02 +0000 Subject: [PATCH 20/26] Move str_mask/ldr_mask --- src/coreclr/jit/instr.h | 4 ---- src/coreclr/jit/instrsarm64sve.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 491287a2c51186..a94998f596e93a 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -66,10 +66,6 @@ enum instruction : uint32_t INS_lea, // Not a real instruction. It is used for load the address of stack locals - // TODO-SVE: Removable once REG_V0 and REG_P0 are distinct - INS_sve_str_mask, // Not a real instruction. It is used to load masks from the stack - INS_sve_ldr_mask, // Not a real instruction. It is used to store masks to the stack - #elif defined(TARGET_LOONGARCH64) #define INST(id, nm, ldst, e1, msk, fmt) INS_##id, #include "instrs.h" diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index f7209c6df98a58..fb469c0bfdc101 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -2840,6 +2840,11 @@ INST1(ldnt1sw, "ldnt1sw", 0, IF_SV INST1(st1q, "st1q", 0, IF_SVE_IY_4A, 0xE4202000 ) // ST1Q {.Q }, , [.D{, }] SVE_IY_4A 11100100001mmmmm 001gggnnnnnttttt E420 2000 + +// TODO-SVE: Removable once REG_V0 and REG_P0 are distinct +INST1(str_mask, "str_mask", 0, IF_SN_0A, BAD_CODE) +INST1(ldr_mask, "ldr_mask", 0, IF_SN_0A, BAD_CODE) + // clang-format on /*****************************************************************************/ From 24cd68b6f9c462450f14a098c0152e8891fcc53b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 Mar 2024 16:13:12 +0000 Subject: [PATCH 21/26] Fix formatting --- src/coreclr/jit/emitarm64.cpp | 204 +++++++++++++++++----------------- 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6d669e33df660d..953a01bcfb548b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17311,22 +17311,22 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) */ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - unsigned scale = 0; - bool isLdrStr = false; - bool isSimple = true; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + unsigned scale = 0; + bool isLdrStr = false; + bool isSimple = true; bool useRegForImm = false; assert(offs >= 0); /* Figure out the variable's frame position */ - bool FPbased; - int base = emitComp->lvaFrameAddress(varx, &FPbased); - int disp = base + offs; - ssize_t imm; + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm; - regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ @@ -17358,7 +17358,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_lea: assert(size == EA_8BYTE); isSimple = false; - scale = 0; + scale = 0; if (disp >= 0) { @@ -17384,59 +17384,59 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va break; case INS_sve_ldr: - { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_IE_2A; - imm = disp; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_IE_2A; + imm = disp; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st } - break; + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct case INS_sve_ldr_mask: - { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_ID_2A; - ins = INS_sve_ldr; - imm = disp; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_ID_2A; + ins = INS_sve_ldr; + imm = disp; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st } - break; + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; default: NYI("emitIns_R_S"); // FP locals? @@ -17626,13 +17626,13 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va bool useRegForImm = false; /* Figure out the variable's frame position */ - bool FPbased; - int base = emitComp->lvaFrameAddress(varx, &FPbased); - int disp = base + offs; - ssize_t imm = disp; + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm = disp; // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? - regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ @@ -17665,57 +17665,57 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va break; case INS_sve_str: - { - assert(isVectorRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JH_2A; + { + assert(isVectorRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JH_2A; - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st } - break; + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct case INS_sve_str_mask: - { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JG_2A; - ins = INS_sve_str; + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JG_2A; + ins = INS_sve_str; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st } - break; + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; default: NYI("emitIns_S_R"); // FP locals? From 5b995ae20670823f56cd7be66c3b3da55de24def Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 Mar 2024 16:40:39 +0000 Subject: [PATCH 22/26] Set imm --- src/coreclr/jit/emitarm64.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 953a01bcfb548b..67c1a8dc2531ab 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17324,7 +17324,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va bool FPbased; int base = emitComp->lvaFrameAddress(varx, &FPbased); int disp = base + offs; - ssize_t imm; + ssize_t imm = disp; regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); @@ -17363,7 +17363,6 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va if (disp >= 0) { ins = INS_add; - imm = disp; } else { @@ -17390,7 +17389,6 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va size = EA_SCALABLE; attr = size; fmt = IF_SVE_IE_2A; - imm = disp; // TODO-SVE: Don't assume 128bit vectors scale = NaturalScale_helper(EA_16BYTE); @@ -17418,7 +17416,6 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va attr = size; fmt = IF_SVE_ID_2A; ins = INS_sve_ldr; - imm = disp; // TODO-SVE: Don't assume 128bit vectors // Predicate size is vector length / 8 @@ -17449,7 +17446,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va if (isSimple) { ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - imm = disp; + if (imm == 0) { fmt = IF_LS_2A; From 3a82d5dd75fdca4ab6641f99c2e4e39d0530a4ca Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 Mar 2024 17:04:30 +0000 Subject: [PATCH 23/26] fix assert --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 67c1a8dc2531ab..f4c8def2520342 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17720,7 +17720,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va } // end switch (ins) - assert(scale <= (isVectorStore || !isSimple) ? 4 : 3); + assert(scale <= ((isVectorStore || !isSimple) ? 4 : 3)); if (isSimple) { From 8baee3866d2d5a8423389372a0c9b475d6a7a30f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 20 Mar 2024 09:16:21 +0000 Subject: [PATCH 24/26] Fix assert (2) --- src/coreclr/jit/emitarm64.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f4c8def2520342..d851cbba953664 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17720,7 +17720,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va } // end switch (ins) - assert(scale <= ((isVectorStore || !isSimple) ? 4 : 3)); + if (isVectorStore || !isSimple) + { + assert(scale <= 4); + } + else + if (isSimple) + { + assert(scale <= 3); + } if (isSimple) { From b22755af10b4d647aed9b99a2c2cc24456f0a701 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 20 Mar 2024 09:50:10 +0000 Subject: [PATCH 25/26] Fix assert (3) --- src/coreclr/jit/emitarm64.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d851cbba953664..9298d0a8dde5fc 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17725,7 +17725,6 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va assert(scale <= 4); } else - if (isSimple) { assert(scale <= 3); } From bd8db6ed341f005288909746b29aa2e0eca10554 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 20 Mar 2024 14:06:30 +0000 Subject: [PATCH 26/26] nop