Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7835f72
Arm64 SVE: Fix conditionalselect with constant arguments
a74nh Jun 18, 2025
18340c8
Make masked EvaluateBinaryInPlace() Arm64 only
a74nh Jun 23, 2025
0d60a5e
Check significantBit in EvaluateSimdVectorToPattern()
a74nh Jun 23, 2025
0a36025
fix set checks in EvaluateSimdVectorToPattern
a74nh Jun 23, 2025
59107d0
Use masks in EvalHWIntrinsicFunTernary() for SVE conditionalselect
a74nh Jun 24, 2025
b923b28
Check all of a vector lane when converting to mask
a74nh Jun 24, 2025
c01bc22
Add testing for EvalHWIntrinsicFunTernary changes
a74nh Jun 24, 2025
f9c6dd6
whitespace
a74nh Jun 24, 2025
802ae0d
Revert "Check all of a vector lane when converting to mask"
a74nh Jun 24, 2025
3c3cb8f
rename significantBit to leastSignificantBit
a74nh Jun 24, 2025
c96e38c
Use LSB of vector when converting from vector to mask
a74nh Jun 25, 2025
9d2cebd
Add LowerCnsMask
a74nh Jun 27, 2025
70d601d
Add testcase
a74nh Jun 27, 2025
5428e1d
Remove EvaluateSimdMaskToPattern
a74nh Jun 27, 2025
a2d7aea
Revert "Use LSB of vector when converting from vector to mask"
a74nh Jun 27, 2025
c65fd38
formatting
a74nh Jun 27, 2025
f513c84
fix assert check
a74nh Jun 27, 2025
3bf4d1e
GenTree for gtNewSimdCvtVectorToMaskNode()
a74nh Jun 30, 2025
cd27a7c
Split NI_Sve_ConditionalSelect into it's own case
a74nh Jun 30, 2025
7856b87
Remove mask version of EvaluateBinaryInPlace
a74nh Jun 30, 2025
84d0408
remove assert
a74nh Jun 30, 2025
ed633f3
Check all bits in EvaluateSimdCvtVectorToMask
a74nh Jul 1, 2025
a53e4d1
Add ConstantVectors test
a74nh Jul 1, 2025
860ff75
merge main
a74nh Jul 2, 2025
cd52ec1
No need for DOTNET_EnableHWIntrinsic in csproj
a74nh Jul 2, 2025
748d297
Use IsMaskZero
a74nh Jul 2, 2025
090523a
Remove EvaluateBinarySimdAndMask
a74nh Jul 2, 2025
e7034bd
In lowering, default the mask type to byte
a74nh Jul 2, 2025
f14fc8e
In lowering, convert mask using byte basetype
a74nh Jul 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18608,6 +18608,30 @@ void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_type
}
}

#if defined(TARGET_ARM64)
//------------------------------------------------------------------------
// GenTreeVecCon::EvaluateUnaryInPlace: Evaluates this constant using the given operation, when the other
// operand is a constant mask
//
// Arguments:
// oper - the operation to use in the evaluation
// scalar - true if this is a scalar operation; otherwise, false
// baseType - the base type of the constant being checked
// other - the mask constant to use in the evaluation
//
void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeMskCon* other)
{
assert(gtType == TYP_SIMD16);

simd16_t otherSimdVal;
EvaluateSimdCvtMaskToVector<simd16_t>(baseType, &otherSimdVal, other->gtSimdMaskVal);

simd16_t result = {};
EvaluateBinarySimd<simd16_t>(oper, scalar, baseType, &result, gtSimd16Val, otherSimdVal);
gtSimd16Val = result;
}
#endif // TARGET_ARM64

//------------------------------------------------------------------------
// GenTreeVecCon::EvaluateBroadcastInPlace: Evaluates this constant using a broadcast
//
Expand Down Expand Up @@ -32838,12 +32862,26 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

if (op2->IsCnsVec() && op3->IsCnsVec())
{
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());
if (op1->IsCnsVec())
{
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());
}
#if defined(TARGET_ARM64)
else if (op1->IsCnsMsk())
{
assert(ni == NI_Sve_ConditionalSelect);

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsMskCon());

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsMskCon());
}
#endif
// op2 = op2 | op3
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6851,6 +6851,9 @@ struct GenTreeVecCon : public GenTree

void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType);
void EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeVecCon* other);
#if defined(TARGET_ARM64)
void EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeMskCon* other);
#endif

template <typename TBase>
void EvaluateBroadcastInPlace(TBase scalar)
Expand Down
123 changes: 108 additions & 15 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1598,35 +1598,33 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0)
uint32_t count = sizeof(TSimd) / sizeof(TBase);
uint64_t mask = 0;

TBase significantBit = 1;
#if defined(TARGET_XARCH)
significantBit = static_cast<TBase>(1) << ((sizeof(TBase) * 8) - 1);
TBase significantBit = static_cast<TBase>(1) << ((sizeof(TBase) * 8) - 1);
#endif

for (uint32_t i = 0; i < count; i++)
{
TBase input0;
memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase));

#if defined(TARGET_XARCH)
// For xarch we have count sequential bits to write depending on if the
// corresponding the input element has its most significant bit set
if ((input0 & significantBit) != 0)
{
#if defined(TARGET_XARCH)
// For xarch we have count sequential bits to write
// depending on if the corresponding the input element
// has its most significant bit set

mask |= static_cast<uint64_t>(1) << i;
}
#elif defined(TARGET_ARM64)
// For Arm64 we have count total bits to write, but
// they are sizeof(TBase) bits apart. We set
// depending on if the corresponding input element
// has its least significant bit set

// For Arm64 we have count total bits to write, but they are sizeof(TBase)
// bits apart. We set depending on if the corresponding input element has
// any bit set (this matches the use of cmpne in outputted assembly).
if (input0 != 0)
{
mask |= static_cast<uint64_t>(1) << (i * sizeof(TBase));
}
#else
unreached();
unreached();
#endif
}
}

memcpy(&result->u8[0], &mask, sizeof(uint64_t));
Expand Down Expand Up @@ -1906,7 +1904,7 @@ SveMaskPattern EvaluateSimdMaskToPattern(simdmask_t arg0)
memcpy(&mask, &arg0.u8[0], sizeof(uint64_t));
uint32_t finalOne = count;

// A mask pattern starts with zero of more 1s and then the rest of the mask is filled with 0s.
// A mask pattern starts with zero or more 1s and then the rest of the mask is filled with 0s.

// Find an unbroken sequence of 1s.
for (uint32_t i = 0; i < count; i++)
Expand Down Expand Up @@ -1993,6 +1991,101 @@ SveMaskPattern EvaluateSimdMaskToPattern(var_types baseType, simdmask_t arg0)
}
}
}

template <typename TSimd, typename TBase>
SveMaskPattern EvaluateSimdVectorToPattern(TSimd arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);
uint32_t finalOne = count;

// A mask pattern starts with zero or more 1s and then the rest of the mask is filled with 0s.
// This pattern is extracted using the least significant bits of the vector elements.

// For Arm64 we have count total bits to read, but they are sizeof(TBase) bits apart. We set
// depending on if the corresponding input element has any bit set (this matches the use
// of cmpne in outputted assembly)

// Find an unbroken sequence of 1s.
for (uint32_t i = 0; i < count; i++)
{
TBase input0;
memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase));

bool isSet = input0 != 0;
if (!isSet)
{
finalOne = i;
break;
}
}

// Find an unbroken sequence of 0s.
for (uint32_t i = finalOne; i < count; i++)
{
TBase input0;
memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase));

bool isSet = input0 != 0;
if (isSet)
{
// Invalid sequence
return SveMaskPatternNone;
}
}

if (finalOne == count)
{
return SveMaskPatternAll;
}
else if (finalOne >= SveMaskPatternVectorCount1 && finalOne <= SveMaskPatternVectorCount8)
{
return (SveMaskPattern)finalOne;
}
else
{
// TODO: Add other patterns as required. These probably won't be seen until we get
// to wider vector lengths.
return SveMaskPatternNone;
}
}

template <typename TSimd>
SveMaskPattern EvaluateSimdVectorToPattern(var_types baseType, TSimd arg0)
{
switch (baseType)
{
case TYP_FLOAT:
case TYP_INT:
case TYP_UINT:
{
return EvaluateSimdVectorToPattern<TSimd, uint32_t>(arg0);
}

case TYP_DOUBLE:
case TYP_LONG:
case TYP_ULONG:
{
return EvaluateSimdVectorToPattern<TSimd, uint64_t>(arg0);
}

case TYP_BYTE:
case TYP_UBYTE:
{
return EvaluateSimdVectorToPattern<TSimd, uint8_t>(arg0);
}

case TYP_SHORT:
case TYP_USHORT:
{
return EvaluateSimdVectorToPattern<TSimd, uint16_t>(arg0);
}

default:
{
unreached();
}
}
}
#endif // TARGET_ARM64

#endif // FEATURE_MASKED_HW_INTRINSICS
Expand Down
36 changes: 36 additions & 0 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7868,6 +7868,28 @@ ValueNum EvaluateSimdCvtVectorToMask(ValueNumStore* vns, var_types simdType, var
return vns->VNForSimdMaskCon(result);
}

#if defined(TARGET_ARM64)
ValueNum EvaluateBinarySimdAndMask(ValueNumStore* vns,
genTreeOps oper,
bool scalar,
var_types simdType,
var_types baseType,
ValueNum arg0VN,
ValueNum arg1VNMask)
{
assert(simdType == TYP_SIMD16);

simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN);

ValueNum arg1VNSimd = EvaluateSimdCvtMaskToVector(vns, simdType, baseType, arg1VNMask);
simd16_t arg1 = GetConstantSimd16(vns, baseType, arg1VNSimd);

simd16_t result = {};
EvaluateBinarySimd<simd16_t>(oper, scalar, baseType, &result, arg0, arg1);
return vns->VNForSimd16Con(result);
}
#endif // TARGET_ARM64

ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
Expand Down Expand Up @@ -9145,6 +9167,20 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(
{
// (y & x) | (z & ~x)

#if defined(TARGET_ARM64)
if (ni == NI_Sve_ConditionalSelect)
{
assert(TypeOfVN(arg0VN) == TYP_MASK);

ValueNum trueVN =
EvaluateBinarySimdAndMask(this, GT_AND, false, type, baseType, arg1VN, arg0VN);
ValueNum falseVN =
EvaluateBinarySimdAndMask(this, GT_AND_NOT, false, type, baseType, arg2VN, arg0VN);

return EvaluateBinarySimd(this, GT_OR, false, type, baseType, trueVN, falseVN);
}
#endif // TARGET_ARM64

ValueNum trueVN = EvaluateBinarySimd(this, GT_AND, false, type, baseType, arg1VN, arg0VN);
ValueNum falseVN = EvaluateBinarySimd(this, GT_AND_NOT, false, type, baseType, arg2VN, arg0VN);

Expand Down
113 changes: 113 additions & 0 deletions src/tests/JIT/opt/SVE/ConditionalSelectConstants.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Numerics;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using Xunit;

public class ConditionalSelectConstants
{
[MethodImpl(MethodImplOptions.NoInlining)]
[Fact]
public static int TestConditionalSelectConstants()
{
bool fail = false;

if (Sve.IsSupported)
{
var r1 = Sve.AddAcross(ConditionalSelect1CC());
Console.WriteLine(r1[0]);
if (r1[0] != 15)
{
fail = true;
}

var r2 = Sve.AddAcross(ConditionalSelect1FT());
Console.WriteLine(r2[0]);
if (r2[0] != -3)
{
fail = true;
}

var r3 = Sve.AddAcross(ConditionalSelect16TF());
Console.WriteLine(r3[0]);
if (r3[0] != 4080)
{
fail = true;
}

var r4 = Sve.AddAcross(ConditionalSelect2CT());
Console.WriteLine(r4[0]);
if (r4[0] != 16)
{
fail = true;
}

var r5 = ConditionalSelectConsts();
Console.WriteLine(r5);
if (r5 != 5)
{
fail = true;
}
}

if (fail)
{
return 101;
}
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Vector<int> ConditionalSelect1CC()
{
return Sve.ConditionalSelect(
Sve.CreateTrueMaskInt32(SveMaskPattern.VectorCount1),
Vector.Create<int>(3),
Vector.Create<int>(4)
);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Vector<int> ConditionalSelect1FT()
{
return Sve.ConditionalSelect(
Sve.CreateTrueMaskInt32(SveMaskPattern.VectorCount1),
Sve.CreateFalseMaskInt32(),
Sve.CreateTrueMaskInt32()
);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Vector<byte> ConditionalSelect16TF()
{
return Sve.ConditionalSelect(
Sve.CreateTrueMaskByte(SveMaskPattern.VectorCount16),
Sve.CreateTrueMaskByte(),
Sve.CreateFalseMaskByte()
);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Vector<int> ConditionalSelect2CT()
{
return Sve.ConditionalSelect(
Sve.CreateTrueMaskInt32(SveMaskPattern.VectorCount2),
Vector.Create<int>(9),
Sve.CreateTrueMaskInt32()
);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static sbyte ConditionalSelectConsts()
{
var vec = Sve.ConditionalSelect(Vector128.CreateScalar((sbyte)49).AsVector(),
Vector128.CreateScalar((sbyte)0).AsVector(),
Vector.Create<sbyte>(107));
return Sve.ConditionalExtractLastActiveElement(Vector128.CreateScalar((sbyte)0).AsVector(), 5, vec);
}
}
Loading
Loading