From 4c0093fcd3ea3e2ea6a760c0bc97976f57505e49 Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Wed, 3 May 2023 14:21:56 +0200 Subject: [PATCH 1/2] [wasm] PackedSimd, add floating point methods --- .../Wasm/PackedSimd.PlatformNotSupported.cs | 51 ++++++ .../Runtime/Intrinsics/Wasm/PackedSimd.cs | 171 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 30 +++ src/mono/mono/mini/llvm-intrinsics.h | 18 +- src/mono/mono/mini/mini-llvm.c | 4 +- src/mono/mono/mini/mini-ops.h | 4 +- src/mono/mono/mini/simd-arm64.h | 4 +- src/mono/mono/mini/simd-intrinsics.c | 85 ++++++--- src/mono/mono/mini/simd-methods.h | 3 + 9 files changed, 329 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.PlatformNotSupported.cs index bd1192b7f1af50..d2103ae038c811 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.PlatformNotSupported.cs @@ -392,6 +392,57 @@ public abstract class PackedSimd public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + // Floating-point sign bit operations + + public static Vector128 Negate(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Negate(Vector128 value) { throw new PlatformNotSupportedException(); } + + public static Vector128 Abs(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Abs(Vector128 value) { throw new PlatformNotSupportedException(); } + + // Floating-point min and max + + public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Min(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Max(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 PseudoMin(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 PseudoMin(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 PseudoMax(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 PseudoMax(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // Floating-point arithmetic + + public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Add(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Subtract(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 Divide(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Divide(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + public static Vector128 Multiply(Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + public static Vector128 Sqrt(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Sqrt(Vector128 value) { throw new PlatformNotSupportedException(); } + + public static Vector128 Ceiling(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Ceiling(Vector128 value) { throw new PlatformNotSupportedException(); } + + public static Vector128 Floor(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Floor(Vector128 value) { throw new PlatformNotSupportedException(); } + + public static Vector128 Truncate(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 Truncate(Vector128 value) { throw new PlatformNotSupportedException(); } + + public static Vector128 RoundToNearest(Vector128 value) { throw new PlatformNotSupportedException(); } + public static Vector128 RoundToNearest(Vector128 value) { throw new PlatformNotSupportedException(); } + // Conversions internal static Vector128 ConvertNarrowingSignedSaturate(Vector128 lower, Vector128 upper) { throw new PlatformNotSupportedException(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.cs index 2a9ac47545ad28..680b8af89492bc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Wasm/PackedSimd.cs @@ -1699,6 +1699,177 @@ public abstract class PackedSimd [Intrinsic] public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) => CompareGreaterThanOrEqual(left, right); + // Floating-point sign bit operations + + /// + /// f32x4.neg + /// + [Intrinsic] + public static Vector128 Negate(Vector128 value) => Negate(value); + /// + /// f64x2.neg + /// + [Intrinsic] + public static Vector128 Negate(Vector128 value) => Negate(value); + + /// + /// f32x4.abs + /// + [Intrinsic] + public static Vector128 Abs(Vector128 value) => Abs(value); + /// + /// f64x2.abs + /// + [Intrinsic] + public static Vector128 Abs(Vector128 value) => Abs(value); + + // Floating-point min and max + + /// + /// f32x4.min + /// + [Intrinsic] + public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); + /// + /// f64x2.min + /// + [Intrinsic] + public static Vector128 Min(Vector128 left, Vector128 right) => Min(left, right); + + /// + /// f32x4.max + /// + [Intrinsic] + public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); + /// + /// f64x2.max + /// + [Intrinsic] + public static Vector128 Max(Vector128 left, Vector128 right) => Max(left, right); + + /// + /// f32x4.pmin + /// + [Intrinsic] + public static Vector128 PseudoMin(Vector128 left, Vector128 right) => PseudoMin(left, right); + /// + /// f64x2.pmin + /// + [Intrinsic] + public static Vector128 PseudoMin(Vector128 left, Vector128 right) => PseudoMin(left, right); + + /// + /// f32x4.pmax + /// + [Intrinsic] + public static Vector128 PseudoMax(Vector128 left, Vector128 right) => PseudoMax(left, right); + /// + /// f64x2.pmax + /// + [Intrinsic] + public static Vector128 PseudoMax(Vector128 left, Vector128 right) => PseudoMax(left, right); + + // Floating-point arithmetic + + /// + /// f32x4.add + /// + [Intrinsic] + public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); + /// + /// f64x2.add + /// + [Intrinsic] + public static Vector128 Add(Vector128 left, Vector128 right) => Add(left, right); + + /// + /// f32x4.sub + /// + [Intrinsic] + public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); + /// + /// f64x2.sub + /// + [Intrinsic] + public static Vector128 Subtract(Vector128 left, Vector128 right) => Subtract(left, right); + + /// + /// f32x4.div + /// + [Intrinsic] + public static Vector128 Divide(Vector128 left, Vector128 right) => Divide(left, right); + /// + /// f64x2.div + /// + [Intrinsic] + public static Vector128 Divide(Vector128 left, Vector128 right) => Divide(left, right); + + /// + /// f32x4.mul + /// + [Intrinsic] + public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); + /// + /// f64x2.mul + /// + [Intrinsic] + public static Vector128 Multiply(Vector128 left, Vector128 right) => Multiply(left, right); + + /// + /// f32x4.sqrt + /// + [Intrinsic] + public static Vector128 Sqrt(Vector128 value) => Sqrt(value); + /// + /// f64x2.sqrt + /// + [Intrinsic] + public static Vector128 Sqrt(Vector128 value) => Sqrt(value); + + /// + /// f32x4.ceil + /// + [Intrinsic] + public static Vector128 Ceiling(Vector128 value) => Ceiling(value); + /// + /// f64x2.ceil + /// + [Intrinsic] + public static Vector128 Ceiling(Vector128 value) => Ceiling(value); + + /// + /// f32x4.floor + /// + [Intrinsic] + public static Vector128 Floor(Vector128 value) => Floor(value); + /// + /// f64x2.floor + /// + [Intrinsic] + public static Vector128 Floor(Vector128 value) => Floor(value); + + /// + /// f32x4.trunc + /// + [Intrinsic] + public static Vector128 Truncate(Vector128 value) => Truncate(value); + /// + /// f64x2.trunc + /// + [Intrinsic] + public static Vector128 Truncate(Vector128 value) => Truncate(value); + + /// + /// f32x4.nearest + /// + [Intrinsic] + public static Vector128 RoundToNearest(Vector128 value) => RoundToNearest(value); + /// + /// f64x2.nearest + /// + [Intrinsic] + public static Vector128 RoundToNearest(Vector128 value) => RoundToNearest(value); + // Conversions /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index d8c842b17ba835..d48000bba4b348 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -6319,5 +6319,35 @@ public abstract partial class PackedSimd public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw null; } public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw null; } public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Negate(Vector128 value) { throw null; } + public static Vector128 Negate(Vector128 value) { throw null; } + public static Vector128 Abs(Vector128 value) { throw null; } + public static Vector128 Abs(Vector128 value) { throw null; } + public static Vector128 Min(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Min(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Max(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Max(Vector128 left, Vector128 right) { throw null; } + public static Vector128 PseudoMin(Vector128 left, Vector128 right) { throw null; } + public static Vector128 PseudoMin(Vector128 left, Vector128 right) { throw null; } + public static Vector128 PseudoMax(Vector128 left, Vector128 right) { throw null; } + public static Vector128 PseudoMax(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Add(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Add(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Subtract(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Subtract(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Divide(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Divide(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Multiply(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Multiply(Vector128 left, Vector128 right) { throw null; } + public static Vector128 Sqrt(Vector128 value) { throw null; } + public static Vector128 Sqrt(Vector128 value) { throw null; } + public static Vector128 Ceiling(Vector128 value) { throw null; } + public static Vector128 Ceiling(Vector128 value) { throw null; } + public static Vector128 Floor(Vector128 value) { throw null; } + public static Vector128 Floor(Vector128 value) { throw null; } + public static Vector128 Truncate(Vector128 value) { throw null; } + public static Vector128 Truncate(Vector128 value) { throw null; } + public static Vector128 RoundToNearest(Vector128 value) { throw null; } + public static Vector128 RoundToNearest(Vector128 value) { throw null; } } } diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index 10932f78bc4ccf..953879caeb0421 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -100,6 +100,14 @@ INTRINS(PEXT_I64, x86_bmi_pext_64, X86) INTRINS(PDEP_I32, x86_bmi_pdep_32, X86) INTRINS(PDEP_I64, x86_bmi_pdep_64, X86) +INTRINS_OVR(SIMD_SQRT_R8, sqrt, Generic, sse_r8_t) +INTRINS_OVR(SIMD_SQRT_R4, sqrt, Generic, sse_r4_t) +INTRINS_OVR_TAG(SIMD_FLOOR, floor, Generic, Scalar | V64 | V128 | R4 | R8) +INTRINS_OVR_TAG(SIMD_CEIL, ceil, Generic, Scalar | V64 | V128 | R4 | R8) +INTRINS_OVR_TAG(SIMD_TRUNC, trunc, Generic, Scalar | V64 | V128 | R4 | R8) +INTRINS_OVR_TAG(SIMD_ROUND, round, Generic, Scalar | V64 | V128 | R4 | R8) +INTRINS_OVR_TAG(SIMD_NEAREST, nearbyint, Generic, V64 | V128 | R4 | R8) + #if LLVM_API_VERSION >= 1400 INTRINS_OVR_TAG(ROUNDEVEN, roundeven, Generic, Scalar | V64 | V128 | R4 | R8) #endif @@ -124,8 +132,6 @@ INTRINS(SSE_PSRL_Q, x86_sse2_psrl_q, X86) INTRINS(SSE_PSLL_W, x86_sse2_psll_w, X86) INTRINS(SSE_PSLL_D, x86_sse2_psll_d, X86) INTRINS(SSE_PSLL_Q, x86_sse2_psll_q, X86) -INTRINS_OVR(SSE_SQRT_PD, sqrt, Generic, sse_r8_t) -INTRINS_OVR(SSE_SQRT_PS, sqrt, Generic, sse_r4_t) INTRINS_OVR(SSE_SQRT_SD, sqrt, Generic, LLVMDoubleType ()) INTRINS_OVR(SSE_SQRT_SS, sqrt, Generic, LLVMFloatType ()) INTRINS(SSE_RCP_PS, x86_sse_rcp_ps, X86) @@ -283,6 +289,10 @@ INTRINS_OVR_2_ARG(WASM_NARROW_SIGNED_V16, wasm_narrow_signed, Wasm, sse_i1_t, ss INTRINS_OVR_2_ARG(WASM_NARROW_SIGNED_V8, wasm_narrow_signed, Wasm, sse_i2_t, sse_i4_t) INTRINS_OVR_2_ARG(WASM_NARROW_UNSIGNED_V16, wasm_narrow_unsigned, Wasm, sse_i1_t, sse_i2_t) INTRINS_OVR_2_ARG(WASM_NARROW_UNSIGNED_V8, wasm_narrow_unsigned, Wasm, sse_i2_t, sse_i4_t) +INTRINS_OVR_TAG(WASM_PMAX, wasm_pmax, Wasm, R4 | R8) +INTRINS_OVR_TAG(WASM_PMIN, wasm_pmin, Wasm, R4 | R8) +INTRINS_OVR(WASM_PMAX_V4, fabs, Generic, sse_r4_t) +INTRINS_OVR(WASM_PMAX_V2, fabs, Generic, sse_r8_t) INTRINS(WASM_Q15MULR_SAT_SIGNED, wasm_q15mulr_sat_signed, Wasm) INTRINS(WASM_SHUFFLE, wasm_shuffle, Wasm) INTRINS_OVR(WASM_SUB_SAT_SIGNED_V16, wasm_sub_sat_signed, Wasm, sse_i1_t) @@ -436,13 +446,9 @@ INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRECPS, aarch64_neon_frecps, Arm64, Scalar | V6 INTRINS_OVR_TAG(AARCH64_ADV_SIMD_RBIT, aarch64_neon_rbit, Arm64, V64 | V128 | I1) #endif -INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRINTA, round, Generic, Scalar | V64 | V128 | R4 | R8) #if LLVM_API_VERSION < 1400 INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRINTN, aarch64_neon_frintn, Arm64, Scalar | V64 | V128 | R4 | R8) #endif -INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRINTM, floor, Generic, Scalar | V64 | V128 | R4 | R8) -INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRINTP, ceil, Generic, Scalar | V64 | V128 | R4 | R8) -INTRINS_OVR_TAG(AARCH64_ADV_SIMD_FRINTZ, trunc, Generic, Scalar | V64 | V128 | R4 | R8) INTRINS_OVR_TAG(AARCH64_ADV_SIMD_SUQADD, aarch64_neon_suqadd, Arm64, Scalar | V64 | V128 | I1 | I2 | I4 | I8) INTRINS_OVR_TAG(AARCH64_ADV_SIMD_USQADD, aarch64_neon_usqadd, Arm64, Scalar | V64 | V128 | I1 | I2 | I4 | I8) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index e2e9e2dd78d540..e6685c42810e09 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -11546,8 +11546,6 @@ MONO_RESTORE_WARNING values [ins->dreg] = result; break; } -#endif -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) case OP_NEGATION: case OP_NEGATION_SCALAR: { gboolean scalar = ins->opcode == OP_NEGATION_SCALAR; @@ -11565,6 +11563,8 @@ MONO_RESTORE_WARNING values [ins->dreg] = result; break; } +#endif +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) case OP_ONES_COMPLEMENT: { LLVMTypeRef ret_t = LLVMTypeOf (lhs); LLVMValueRef result = bitcast_to_integral (ctx, lhs); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 00f382b4148d94..918a655ccbdd97 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1803,8 +1803,6 @@ MINI_OP(OP_WASM_ONESCOMPLEMENT, "wasm_onescomplement", XREG, XREG, NONE) #endif #if defined(TARGET_ARM64) || defined(TARGET_AMD64) -MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) -MINI_OP(OP_NEGATION_SCALAR, "negate_scalar", XREG, XREG, NONE) MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_UI, "convert_fp_to_ui", XREG, XREG, NONE) @@ -1819,6 +1817,8 @@ MINI_OP(OP_CVT_SI_FP_SCALAR, "convert_si_to_fp_scalar", XREG, XREG, NONE) #endif // TARGET_ARM64 || TARGET_AMD64 #if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) +MINI_OP(OP_NEGATION_SCALAR, "negate_scalar", XREG, XREG, NONE) MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) #endif // TARGET_ARM64 || TARGET_AMD64 || TARGET_WASM diff --git a/src/mono/mono/mini/simd-arm64.h b/src/mono/mono/mini/simd-arm64.h index 9bd2c31cb7dd79..c7d60f4f5f97f1 100644 --- a/src/mono/mono/mini/simd-arm64.h +++ b/src/mono/mono/mini/simd-arm64.h @@ -78,8 +78,8 @@ SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR, WDSS, arm_neo SIMD_OP (128, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_UADDV, WTDS, arm_neon_addv, arm_neon_addv, arm_neon_addv, _SKIP, _UNDEF, _UNDEF) SIMD_OP (128, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_SADDV, WTDS, arm_neon_addv, arm_neon_addv, arm_neon_addv, _SKIP, _UNDEF, _UNDEF) SIMD_OP (128, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_FADDV, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, _SKIP, _SKIP) -SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTP, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_frintp, arm_neon_frintp) -SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTM, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_frintm, arm_neon_frintm) +SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_SIMD_CEIL, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_frintp, arm_neon_frintp) +SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_SIMD_FLOOR, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_frintm, arm_neon_frintm) SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsqrt, arm_neon_fsqrt) SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_ABS, WTDS, arm_neon_abs, arm_neon_abs, arm_neon_abs, arm_neon_abs, _UNDEF, _UNDEF) SIMD_OP (128, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FABS, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fabs, arm_neon_fabs) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 819a9f6b2b0058..732b57041eac6a 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -428,13 +428,19 @@ emit_simd_ins_for_unary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignat } return emit_simd_ins_for_sig (cfg, klass, op, -1, arg_type, fsig, args); #elif defined(TARGET_WASM) + int op = -1; switch (id) { + case SN_Negate: + op = OP_NEGATION; + break; case SN_OnesComplement: - return emit_simd_ins_for_sig (cfg, klass, OP_WASM_ONESCOMPLEMENT, -1, arg_type, fsig, args); + op = OP_WASM_ONESCOMPLEMENT; + break; default: return NULL; } + return emit_simd_ins_for_sig (cfg, klass, op, -1, arg_type, fsig, args); #else return NULL; #endif @@ -1452,7 +1458,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!type_enum_is_float (arg0_type)) return NULL; #ifdef TARGET_ARM64 - int ceil_or_floor = id == SN_Ceiling ? INTRINS_AARCH64_ADV_SIMD_FRINTP : INTRINS_AARCH64_ADV_SIMD_FRINTM; + int ceil_or_floor = id == SN_Ceiling ? INTRINS_SIMD_CEIL : INTRINS_SIMD_FLOOR; return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X, ceil_or_floor, arg0_type, fsig, args); #elif defined(TARGET_AMD64) if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSE41)) @@ -2026,8 +2032,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #ifdef TARGET_ARM64 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, arg0_type, fsig, args); -#elif defined(TARGET_AMD64) - int instc0 = arg0_type == MONO_TYPE_R4 ? INTRINS_SSE_SQRT_PS : INTRINS_SSE_SQRT_PD; +#elif defined(TARGET_AMD64) || defined(TARGET_WASM) + int instc0 = arg0_type == MONO_TYPE_R4 ? INTRINS_SIMD_SQRT_R4 : INTRINS_SIMD_SQRT_R8; return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, instc0, arg0_type, fsig, args); #else @@ -2643,9 +2649,9 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f case SN_SquareRoot: { #ifdef TARGET_ARM64 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, MONO_TYPE_R4, fsig, args); -#elif defined(TARGET_AMD64) +#elif defined(TARGET_AMD64) || defined(TARGET_WASM) ins = emit_simd_ins (cfg, klass, OP_XOP_X_X, args [0]->dreg, -1); - ins->inst_c0 = (IntrinsicId)INTRINS_SSE_SQRT_PS; + ins->inst_c0 = (IntrinsicId)INTRINS_SIMD_SQRT_R4; return ins; #else return NULL; @@ -3204,8 +3210,8 @@ static SimdIntrinsic advsimd_methods [] = { {SN_And, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_AND}, {SN_BitwiseClear, OP_ARM64_BIC}, {SN_BitwiseSelect, OP_BSL}, - {SN_Ceiling, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTP}, - {SN_CeilingScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTP}, + {SN_Ceiling, OP_XOP_OVR_X_X, INTRINS_SIMD_CEIL}, + {SN_CeilingScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_CEIL}, {SN_CompareEqual, OP_XCOMPARE, CMP_EQ, OP_XCOMPARE, CMP_EQ, OP_XCOMPARE_FP, CMP_EQ}, {SN_CompareEqualScalar, OP_XCOMPARE_SCALAR, CMP_EQ, OP_XCOMPARE_SCALAR, CMP_EQ, OP_XCOMPARE_FP_SCALAR, CMP_EQ}, {SN_CompareGreaterThan, OP_XCOMPARE, CMP_GT, OP_XCOMPARE, CMP_GT_UN, OP_XCOMPARE_FP, CMP_GT}, @@ -3284,8 +3290,8 @@ static SimdIntrinsic advsimd_methods [] = { {SN_ExtractNarrowingUpper, OP_ARM64_XTN2}, {SN_ExtractVector128, OP_ARM64_EXT}, {SN_ExtractVector64, OP_ARM64_EXT}, - {SN_Floor, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTM}, - {SN_FloorScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTM}, + {SN_Floor, OP_XOP_OVR_X_X, INTRINS_SIMD_FLOOR}, + {SN_FloorScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_FLOOR}, {SN_FusedAddHalving, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SHADD, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_UHADD}, {SN_FusedAddRoundedHalving, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SRHADD, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_URHADD}, {SN_FusedMultiplyAdd, OP_ARM64_FMADD}, @@ -3427,8 +3433,8 @@ static SimdIntrinsic advsimd_methods [] = { #else {SN_ReverseElementBits, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_RBIT}, #endif - {SN_RoundAwayFromZero, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTA}, - {SN_RoundAwayFromZeroScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTA}, + {SN_RoundAwayFromZero, OP_XOP_OVR_X_X, INTRINS_SIMD_ROUND}, + {SN_RoundAwayFromZeroScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_ROUND}, #if LLVM_API_VERSION >= 1400 {SN_RoundToNearest, OP_XOP_OVR_X_X, INTRINS_ROUNDEVEN}, {SN_RoundToNearestScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_ROUNDEVEN}, @@ -3436,12 +3442,12 @@ static SimdIntrinsic advsimd_methods [] = { {SN_RoundToNearest, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTN}, {SN_RoundToNearestScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTN}, #endif - {SN_RoundToNegativeInfinity, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTM}, - {SN_RoundToNegativeInfinityScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTM}, - {SN_RoundToPositiveInfinity, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTP}, - {SN_RoundToPositiveInfinityScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTP}, - {SN_RoundToZero, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTZ}, - {SN_RoundToZeroScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_AARCH64_ADV_SIMD_FRINTZ}, + {SN_RoundToNegativeInfinity, OP_XOP_OVR_X_X, INTRINS_SIMD_FLOOR}, + {SN_RoundToNegativeInfinityScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_FLOOR}, + {SN_RoundToPositiveInfinity, OP_XOP_OVR_X_X, INTRINS_SIMD_CEIL}, + {SN_RoundToPositiveInfinityScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_CEIL}, + {SN_RoundToZero, OP_XOP_OVR_X_X, INTRINS_SIMD_TRUNC}, + {SN_RoundToZeroScalar, OP_XOP_OVR_SCALAR_X_X, INTRINS_SIMD_TRUNC}, {SN_ShiftArithmetic, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SSHL}, {SN_ShiftArithmeticRounded, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SRSHL}, {SN_ShiftArithmeticRoundedSaturate, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SQRSHL}, @@ -4042,7 +4048,7 @@ static SimdIntrinsic sse_methods [] = { {SN_ReciprocalSqrt, OP_XOP_X_X, INTRINS_SSE_RSQRT_PS}, {SN_ReciprocalSqrtScalar}, {SN_Shuffle}, - {SN_Sqrt, OP_XOP_X_X, INTRINS_SSE_SQRT_PS}, + {SN_Sqrt, OP_XOP_X_X, INTRINS_SIMD_SQRT_R4}, {SN_SqrtScalar}, {SN_Store, OP_SSE_STORE, 1 /* alignment */}, {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */}, @@ -4152,7 +4158,7 @@ static SimdIntrinsic sse2_methods [] = { {SN_Shuffle}, {SN_ShuffleHigh}, {SN_ShuffleLow}, - {SN_Sqrt, OP_XOP_X_X, INTRINS_SSE_SQRT_PD}, + {SN_Sqrt, OP_XOP_X_X, INTRINS_SIMD_SQRT_R8}, {SN_SqrtScalar}, {SN_Store, OP_SSE_STORE, 1 /* alignment */}, {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */}, @@ -5043,7 +5049,7 @@ static SimdIntrinsic wasmbase_methods [] = { }; static SimdIntrinsic packedsimd_methods [] = { - {SN_Abs, OP_VECTOR_IABS}, + {SN_Abs}, {SN_Add}, {SN_AddPairwiseWidening}, {SN_AddSaturate}, @@ -5054,6 +5060,7 @@ static SimdIntrinsic packedsimd_methods [] = { {SN_AverageRounded}, {SN_Bitmask, OP_WASM_SIMD_BITMASK}, {SN_BitwiseSelect, OP_BSL}, + {SN_Ceiling, OP_XOP_OVR_X_X, INTRINS_SIMD_CEIL}, {SN_CompareEqual, OP_XCOMPARE, CMP_EQ, OP_XCOMPARE, CMP_EQ, OP_XCOMPARE_FP, CMP_EQ}, {SN_CompareGreaterThan, OP_XCOMPARE, CMP_GT, OP_XCOMPARE, CMP_GT_UN, OP_XCOMPARE_FP, CMP_GT}, {SN_CompareGreaterThanOrEqual, OP_XCOMPARE, CMP_GE, OP_XCOMPARE, CMP_GE_UN, OP_XCOMPARE_FP, CMP_GE}, @@ -5062,10 +5069,12 @@ static SimdIntrinsic packedsimd_methods [] = { {SN_CompareNotEqual, OP_XCOMPARE, CMP_NE, OP_XCOMPARE, CMP_NE, OP_XCOMPARE_FP, CMP_NE}, {SN_ConvertNarrowingSignedSaturate}, {SN_ConvertNarrowingUnsignedSaturate}, + {SN_Divide}, {SN_Dot, OP_XOP_X_X_X, INTRINS_WASM_DOT}, {SN_ExtractLane}, - {SN_Max, OP_XBINOP, OP_IMIN, OP_XBINOP, OP_IMIN_UN}, - {SN_Min, OP_XBINOP, OP_IMAX, OP_XBINOP, OP_IMAX_UN}, + {SN_Floor, OP_XOP_OVR_X_X, INTRINS_SIMD_FLOOR}, + {SN_Max, OP_XBINOP, OP_IMIN, OP_XBINOP, OP_IMIN_UN, OP_XBINOP, OP_FMIN}, + {SN_Min, OP_XBINOP, OP_IMAX, OP_XBINOP, OP_IMAX_UN, OP_XBINOP, OP_FMAX}, {SN_Multiply}, {SN_MultiplyRoundedSaturateQ15, OP_XOP_X_X_X, INTRINS_WASM_Q15MULR_SAT_SIGNED}, {SN_MultiplyWideningLower, OP_WASM_EXTMUL_LOWER, 0, OP_WASM_EXTMUL_LOWER_U}, @@ -5074,15 +5083,20 @@ static SimdIntrinsic packedsimd_methods [] = { {SN_Not, OP_WASM_ONESCOMPLEMENT}, {SN_Or, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_OR}, {SN_PopCount, OP_XOP_OVR_X_X, INTRINS_SIMD_POPCNT}, + {SN_PseudoMax, OP_XOP_OVR_X_X, INTRINS_WASM_PMAX}, + {SN_PseudoMin, OP_XOP_OVR_X_X, INTRINS_WASM_PMIN}, {SN_ReplaceLane}, + {SN_RoundToNearest, OP_XOP_OVR_X_X, INTRINS_SIMD_NEAREST}, {SN_ShiftLeft, OP_SIMD_SHL}, {SN_ShiftRightArithmetic, OP_SIMD_SSHR}, {SN_ShiftRightLogical, OP_SIMD_USHR}, {SN_Shuffle, OP_WASM_SIMD_SHUFFLE}, {SN_Splat}, + {SN_Sqrt}, {SN_Subtract}, {SN_SubtractSaturate}, {SN_Swizzle, OP_WASM_SIMD_SWIZZLE}, + {SN_Truncate, OP_XOP_OVR_X_X, INTRINS_SIMD_TRUNC}, {SN_Xor, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR}, {SN_get_IsSupported}, }; @@ -5151,15 +5165,26 @@ emit_wasm_supported_intrinsics ( } if (feature == MONO_CPU_WASM_SIMD) { - if (id != SN_Splat && !is_element_type_primitive (fsig->params [0]) || - id == SN_Splat && !MONO_TYPE_IS_VECTOR_PRIMITIVE(fsig->params [0])) + if ((id != SN_Splat && !is_element_type_primitive (fsig->params [0])) || + (id == SN_Splat && !MONO_TYPE_IS_VECTOR_PRIMITIVE(fsig->params [0]))) return NULL; uint16_t op = info->default_op; uint16_t c0 = info->default_instc0; switch (id) { + case SN_Abs: { + if (type_enum_is_float(arg0_type)) { + op = OP_XOP_X_X; + c0 = arg0_type == MONO_TYPE_R8 ? INTRINS_WASM_FABS_V2 : INTRINS_WASM_FABS_V4; + } else { + op = OP_VECTOR_IABS; + } + // continue with default emit + break; + } case SN_Add: + case SN_Divide: case SN_Subtract: case SN_Multiply: return emit_simd_ins_for_binary_op (cfg, klass, fsig, args, arg0_type, id); @@ -5297,10 +5322,6 @@ emit_wasm_supported_intrinsics ( return NULL; } - case SN_CompareEqual: - return emit_simd_ins_for_sig (cfg, klass, type_enum_is_float (arg0_type) ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_EQ, arg0_type, fsig, args); - case SN_CompareNotEqual: - return emit_simd_ins_for_sig (cfg, klass, type_enum_is_float (arg0_type) ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_NE, arg0_type, fsig, args); case SN_ConvertNarrowingSignedSaturate: { op = OP_XOP_X_X_X; @@ -5353,6 +5374,12 @@ emit_wasm_supported_intrinsics ( g_assert (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)); return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1); } + case SN_Sqrt: { + op = OP_XOP_X_X; + c0 = arg0_type == MONO_TYPE_R4 ? INTRINS_SIMD_SQRT_R4 : INTRINS_SIMD_SQRT_R8; + // continue with default emit + break; + } case SN_SubtractSaturate: { op = OP_XOP_X_X_X; diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index 20db4e837c5f58..6ad755ba155a9f 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -643,7 +643,10 @@ METHOD(ConvertNarrowingSignedSaturate) METHOD(ConvertNarrowingUnsignedSaturate) METHOD(ExtractLane) METHOD(MultiplyRoundedSaturateQ15) +METHOD(PseudoMax) +METHOD(PseudoMin) METHOD(ReplaceLane) METHOD(ShiftLeft) METHOD(Splat) METHOD(Swizzle) +METHOD(Truncate) From 3e41845b148672ff3b480e22637b1db5e89a59c1 Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Thu, 4 May 2023 17:14:14 +0200 Subject: [PATCH 2/2] Fix pmax/pmin --- src/mono/mono/mini/llvm-intrinsics.h | 4 +- src/mono/mono/mini/mini-llvm.c | 100 +++++++++++++-------------- src/mono/mono/mini/simd-intrinsics.c | 4 +- 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index 953879caeb0421..da408fbe78d97f 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -289,8 +289,8 @@ INTRINS_OVR_2_ARG(WASM_NARROW_SIGNED_V16, wasm_narrow_signed, Wasm, sse_i1_t, ss INTRINS_OVR_2_ARG(WASM_NARROW_SIGNED_V8, wasm_narrow_signed, Wasm, sse_i2_t, sse_i4_t) INTRINS_OVR_2_ARG(WASM_NARROW_UNSIGNED_V16, wasm_narrow_unsigned, Wasm, sse_i1_t, sse_i2_t) INTRINS_OVR_2_ARG(WASM_NARROW_UNSIGNED_V8, wasm_narrow_unsigned, Wasm, sse_i2_t, sse_i4_t) -INTRINS_OVR_TAG(WASM_PMAX, wasm_pmax, Wasm, R4 | R8) -INTRINS_OVR_TAG(WASM_PMIN, wasm_pmin, Wasm, R4 | R8) +INTRINS_OVR_TAG(WASM_PMAX, wasm_pmax, Wasm, V128 | R4 | R8) +INTRINS_OVR_TAG(WASM_PMIN, wasm_pmin, Wasm, V128 | R4 | R8) INTRINS_OVR(WASM_PMAX_V4, fabs, Generic, sse_r4_t) INTRINS_OVR(WASM_PMAX_V2, fabs, Generic, sse_r8_t) INTRINS(WASM_Q15MULR_SAT_SIGNED, wasm_q15mulr_sat_signed, Wasm) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index e6685c42810e09..aada76de49ff6f 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -7782,13 +7782,62 @@ MONO_RESTORE_WARNING values [ins->dreg] = result; break; } +#endif case OP_XOP_OVR_X_X: { IntrinsicId iid = (IntrinsicId) ins->inst_c0; llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, &lhs, ""); break; } -#endif + case OP_XOP_OVR_X_X_X: { + IntrinsicId iid = (IntrinsicId) ins->inst_c0; + llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); + LLVMValueRef args [] = { lhs, rhs }; + values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); + break; + } + case OP_XOP_OVR_X_X_X_X: { + IntrinsicId iid = (IntrinsicId) ins->inst_c0; + llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); + LLVMValueRef args [] = { lhs, rhs, arg3 }; + values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); + break; + } + case OP_XOP_OVR_BYSCALAR_X_X_X: { + IntrinsicId iid = (IntrinsicId) ins->inst_c0; + llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); + LLVMTypeRef t = LLVMTypeOf (lhs); + unsigned int elems = LLVMGetVectorSize (t); + LLVMValueRef arg2 = broadcast_element (ctx, scalar_from_vector (ctx, rhs), elems); + LLVMValueRef args [] = { lhs, arg2 }; + values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); + break; + } + case OP_XOP_OVR_SCALAR_X_X: + case OP_XOP_OVR_SCALAR_X_X_X: + case OP_XOP_OVR_SCALAR_X_X_X_X: { + int num_args = 0; + IntrinsicId iid = (IntrinsicId) ins->inst_c0; + LLVMTypeRef ret_t = simd_class_to_llvm_type (ctx, ins->klass); + switch (ins->opcode) { + case OP_XOP_OVR_SCALAR_X_X: num_args = 1; break; + case OP_XOP_OVR_SCALAR_X_X_X: num_args = 2; break; + case OP_XOP_OVR_SCALAR_X_X_X_X: num_args = 3; break; + } + /* LLVM 9 NEON intrinsic functions have scalar overloads. Unfortunately + * only overloads for 32 and 64-bit integers and floating point types are + * supported. 8 and 16-bit integers are unsupported, and will fail during + * instruction selection. This is worked around by using a vector + * operation and then explicitly clearing the upper bits of the register. + */ + ScalarOpFromVectorOpCtx sctx = scalar_op_from_vector_op (ctx, ret_t, ins); + LLVMValueRef args [3] = { lhs, rhs, arg3 }; + scalar_op_from_vector_op_process_args (&sctx, args, num_args); + LLVMValueRef result = call_overloaded_intrins (ctx, iid, sctx.ovr_tag, args, ""); + result = scalar_op_from_vector_op_process_result (&sctx, result); + values [ins->dreg] = result; + break; + } #if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM) case OP_EXTRACTX_U2: case OP_XEXTRACT_I1: @@ -11473,55 +11522,6 @@ MONO_RESTORE_WARNING values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); break; } - case OP_XOP_OVR_X_X_X: { - IntrinsicId iid = (IntrinsicId) ins->inst_c0; - llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); - LLVMValueRef args [] = { lhs, rhs }; - values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); - break; - } - case OP_XOP_OVR_X_X_X_X: { - IntrinsicId iid = (IntrinsicId) ins->inst_c0; - llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); - LLVMValueRef args [] = { lhs, rhs, arg3 }; - values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); - break; - } - case OP_XOP_OVR_BYSCALAR_X_X_X: { - IntrinsicId iid = (IntrinsicId) ins->inst_c0; - llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); - LLVMTypeRef t = LLVMTypeOf (lhs); - unsigned int elems = LLVMGetVectorSize (t); - LLVMValueRef arg2 = broadcast_element (ctx, scalar_from_vector (ctx, rhs), elems); - LLVMValueRef args [] = { lhs, arg2 }; - values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); - break; - } - case OP_XOP_OVR_SCALAR_X_X: - case OP_XOP_OVR_SCALAR_X_X_X: - case OP_XOP_OVR_SCALAR_X_X_X_X: { - int num_args = 0; - IntrinsicId iid = (IntrinsicId) ins->inst_c0; - LLVMTypeRef ret_t = simd_class_to_llvm_type (ctx, ins->klass); - switch (ins->opcode) { - case OP_XOP_OVR_SCALAR_X_X: num_args = 1; break; - case OP_XOP_OVR_SCALAR_X_X_X: num_args = 2; break; - case OP_XOP_OVR_SCALAR_X_X_X_X: num_args = 3; break; - } - /* LLVM 9 NEON intrinsic functions have scalar overloads. Unfortunately - * only overloads for 32 and 64-bit integers and floating point types are - * supported. 8 and 16-bit integers are unsupported, and will fail during - * instruction selection. This is worked around by using a vector - * operation and then explicitly clearing the upper bits of the register. - */ - ScalarOpFromVectorOpCtx sctx = scalar_op_from_vector_op (ctx, ret_t, ins); - LLVMValueRef args [3] = { lhs, rhs, arg3 }; - scalar_op_from_vector_op_process_args (&sctx, args, num_args); - LLVMValueRef result = call_overloaded_intrins (ctx, iid, sctx.ovr_tag, args, ""); - result = scalar_op_from_vector_op_process_result (&sctx, result); - values [ins->dreg] = result; - break; - } #endif #ifdef TARGET_WASM case OP_WASM_ONESCOMPLEMENT: { diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 732b57041eac6a..f0f0d191947226 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -5083,8 +5083,8 @@ static SimdIntrinsic packedsimd_methods [] = { {SN_Not, OP_WASM_ONESCOMPLEMENT}, {SN_Or, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_OR}, {SN_PopCount, OP_XOP_OVR_X_X, INTRINS_SIMD_POPCNT}, - {SN_PseudoMax, OP_XOP_OVR_X_X, INTRINS_WASM_PMAX}, - {SN_PseudoMin, OP_XOP_OVR_X_X, INTRINS_WASM_PMIN}, + {SN_PseudoMax, OP_XOP_OVR_X_X_X, INTRINS_WASM_PMAX}, + {SN_PseudoMin, OP_XOP_OVR_X_X_X, INTRINS_WASM_PMIN}, {SN_ReplaceLane}, {SN_RoundToNearest, OP_XOP_OVR_X_X, INTRINS_SIMD_NEAREST}, {SN_ShiftLeft, OP_SIMD_SHL},