@@ -21492,19 +21492,63 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2149221492 {
2149321493 assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64));
2149421494
21495- if (simdSize == 64)
21495+ bool isV512Supported = false;
21496+ if (compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL))
2149621497 {
21497- assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ));
21498- intrinsic = NI_AVX512DQ_MultiplyLow;
21499- }
21500- else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
21501- {
21502- intrinsic = NI_AVX10v1_MultiplyLow;
21498+ if (simdSize == 64)
21499+ {
21500+ assert(isV512Supported);
21501+ intrinsic = NI_AVX512DQ_MultiplyLow;
21502+ }
21503+ else
21504+ {
21505+ intrinsic = !isV512Supported ? NI_AVX10v1_MultiplyLow : NI_AVX512DQ_VL_MultiplyLow;
21506+ }
2150321507 }
2150421508 else
2150521509 {
21506- assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));
21507- intrinsic = NI_AVX512DQ_VL_MultiplyLow;
21510+ assert(((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) ||
21511+ ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)));
21512+
21513+ // Make op1 and op2 multi-use:
21514+ GenTree* op1Dup = fgMakeMultiUse(&op1);
21515+ GenTree* op2Dup = fgMakeMultiUse(&op2);
21516+
21517+ const bool is256 = simdSize == 32;
21518+
21519+ // Vector256<ulong> tmp0 = Avx2.Multiply(left, right);
21520+ GenTreeHWIntrinsic* tmp0 =
21521+ gtNewSimdHWIntrinsicNode(type, op1, op2, is256 ? NI_AVX2_Multiply : NI_SSE2_Multiply,
21522+ CORINFO_TYPE_ULONG, simdSize);
21523+
21524+ // Vector256<uint> tmp1 = Avx2.Shuffle(right.AsUInt32(), ZWXY);
21525+ GenTree* shuffleMask = gtNewIconNode(SHUFFLE_ZWXY, TYP_INT);
21526+ GenTreeHWIntrinsic* tmp1 = gtNewSimdHWIntrinsicNode(type, op2Dup, shuffleMask,
21527+ is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21528+ CORINFO_TYPE_UINT, simdSize);
21529+
21530+ // Vector256<uint> tmp2 = Avx2.MultiplyLow(left.AsUInt32(), tmp1);
21531+ GenTreeHWIntrinsic* tmp2 =
21532+ gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1,
21533+ is256 ? NI_AVX2_MultiplyLow : NI_SSE41_MultiplyLow,
21534+ CORINFO_TYPE_UINT, simdSize);
21535+
21536+ // Vector256<int> tmp3 = Avx2.HorizontalAdd(tmp2.AsInt32(), Vector256<int>.Zero);
21537+ GenTreeHWIntrinsic* tmp3 =
21538+ gtNewSimdHWIntrinsicNode(type, tmp2, gtNewZeroConNode(type),
21539+ is256 ? NI_AVX2_HorizontalAdd : NI_SSSE3_HorizontalAdd,
21540+ CORINFO_TYPE_UINT, simdSize);
21541+
21542+ // Vector256<int> tmp4 = Avx2.Shuffle(tmp3, YWXW);
21543+ shuffleMask = gtNewIconNode(SHUFFLE_YWXW, TYP_INT);
21544+ GenTreeHWIntrinsic* tmp4 =
21545+ gtNewSimdHWIntrinsicNode(type, tmp3, shuffleMask, is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21546+ CORINFO_TYPE_UINT, simdSize);
21547+
21548+ // result = tmp0 + tmp4;
21549+ op1 = tmp0;
21550+ op2 = tmp4;
21551+ intrinsic = simdSize == 32 ? NI_AVX2_Add : NI_SSE2_Add;
2150821552 }
2150921553
2151021554 break;
0 commit comments