diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 7adf4c24e10..3983beb82ec 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -49,6 +49,9 @@ extern "platform-intrinsic" { /// fsqrt pub(crate) fn simd_fsqrt(x: T) -> T; + /// fma + pub(crate) fn simd_fma(x: T, y: T, z: T) -> T; + pub(crate) fn simd_eq(x: T, y: T) -> U; pub(crate) fn simd_ne(x: T, y: T) -> U; pub(crate) fn simd_lt(x: T, y: T) -> U; diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs new file mode 100644 index 00000000000..c1c4c645db6 --- /dev/null +++ b/crates/core_simd/src/iter.rs @@ -0,0 +1,52 @@ +macro_rules! impl_traits { + { $type:ident } => { + impl core::iter::Sum for crate::$type + where + Self: crate::LanesAtMost32, + { + fn sum>(iter: I) -> Self { + iter.fold(Default::default(), core::ops::Add::add) + } + } + + impl core::iter::Product for crate::$type + where + Self: crate::LanesAtMost32, + { + fn product>(iter: I) -> Self { + iter.fold(Default::default(), core::ops::Mul::mul) + } + } + + impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type + where + Self: crate::LanesAtMost32, + { + fn sum>(iter: I) -> Self { + iter.fold(Default::default(), core::ops::Add::add) + } + } + + impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type + where + Self: crate::LanesAtMost32, + { + fn product>(iter: I) -> Self { + iter.fold(Default::default(), core::ops::Mul::mul) + } + } + } +} + +impl_traits! { SimdF32 } +impl_traits! { SimdF64 } +impl_traits! { SimdU8 } +impl_traits! { SimdU16 } +impl_traits! { SimdU32 } +impl_traits! { SimdU64 } +impl_traits! { SimdUsize } +impl_traits! { SimdI8 } +impl_traits! { SimdI16 } +impl_traits! { SimdI32 } +impl_traits! { SimdI64 } +impl_traits! { SimdIsize } diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 7fe7d666e8d..5b74d1d574a 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -22,6 +22,7 @@ pub use to_bytes::ToBytes; mod comparisons; mod fmt; mod intrinsics; +mod iter; mod ops; mod round; diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index c4565a9dd90..4656eb3f379 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -4,7 +4,7 @@ /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary /// representation. Called from `define_float_vector!`. macro_rules! impl_float_vector { - { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => { + { $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => { impl_vector! { $name, $type } impl_float_reductions! { $name, $type } @@ -36,6 +36,18 @@ macro_rules! impl_float_vector { unsafe { crate::intrinsics::simd_fabs(self) } } + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target + /// architecture has a dedicated `fma` CPU instruction. However, this is not always + /// true, and will be heavily dependent on designing algorithms with specific target + /// hardware in mind. + #[inline] + pub fn mul_add(self, a: Self, b: Self) -> Self { + unsafe { crate::intrinsics::simd_fma(self, a, b) } + } + /// Produces a vector where every lane has the square root value /// of the equivalently-indexed lane in `self` #[inline] @@ -43,6 +55,25 @@ macro_rules! impl_float_vector { pub fn sqrt(self) -> Self { unsafe { crate::intrinsics::simd_fsqrt(self) } } + + /// Takes the reciprocal (inverse) of each lane, `1/x`. + #[inline] + pub fn recip(self) -> Self { + Self::splat(1.0) / self + } + + /// Converts each lane from radians to degrees. + #[inline] + pub fn to_degrees(self) -> Self { + // to_degrees uses a special constant for better precision, so extract that constant + self * Self::splat($type::to_degrees(1.)) + } + + /// Converts each lane from degrees to radians. + #[inline] + pub fn to_radians(self) -> Self { + self * Self::splat($type::to_radians(1.)) + } } impl $name @@ -97,6 +128,67 @@ macro_rules! impl_float_vector { pub fn is_normal(self) -> crate::$mask_ty { !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite()) } + + /// Replaces each lane with a number that represents its sign. + /// + /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` + /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` + /// * `NAN` if the number is `NAN` + #[inline] + pub fn signum(self) -> Self { + self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self)) + } + + /// Returns each lane with the magnitude of `self` and the sign of `sign`. + /// + /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. + #[inline] + pub fn copysign(self, sign: Self) -> Self { + let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); + let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); + Self::from_bits(sign_bit | magnitude) + } + + /// Returns the minimum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[inline] + pub fn min(self, other: Self) -> Self { + // TODO consider using an intrinsic + self.is_nan().select( + other, + self.lanes_ge(other).select(other, self) + ) + } + + /// Returns the maximum of each lane. + /// + /// If one of the values is `NAN`, then the other value is returned. + #[inline] + pub fn max(self, other: Self) -> Self { + // TODO consider using an intrinsic + self.is_nan().select( + other, + self.lanes_le(other).select(other, self) + ) + } + + /// Restrict each lane to a certain interval unless it is NaN. + /// + /// For each lane in `self`, returns the corresponding lane in `max` if the lane is + /// greater than `max`, and the corresponding lane in `min` if the lane is less + /// than `min`. Otherwise returns the lane in `self`. + #[inline] + pub fn clamp(self, min: Self, max: Self) -> Self { + assert!( + min.lanes_le(max).all(), + "each lane in `min` must be less than or equal to the corresponding lane in `max`", + ); + let mut x = self; + x = x.lanes_lt(min).select(min, x); + x = x.lanes_gt(max).select(max, x); + x + } } }; } diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs index a535fad7bc1..dd7b2225dbd 100644 --- a/crates/core_simd/src/vector/int.rs +++ b/crates/core_simd/src/vector/int.rs @@ -33,14 +33,28 @@ macro_rules! impl_integer_vector { crate::$mask_ty: crate::Mask, { /// Returns true for each positive lane and false if it is zero or negative. + #[inline] pub fn is_positive(self) -> crate::$mask_ty { self.lanes_gt(Self::splat(0)) } /// Returns true for each negative lane and false if it is zero or positive. + #[inline] pub fn is_negative(self) -> crate::$mask_ty { self.lanes_lt(Self::splat(0)) } + + /// Returns numbers representing the sign of each lane. + /// * `0` if the number is zero + /// * `1` if the number is positive + /// * `-1` if the number is negative + #[inline] + pub fn signum(self) -> Self { + self.is_positive().select( + Self::splat(1), + self.is_negative().select(Self::splat(-1), Self::splat(0)) + ) + } } } } diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 83c6fec69e8..cb39e737705 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -247,6 +247,15 @@ macro_rules! impl_signed_tests { &|_| true, ); } + + fn signum() { + test_helpers::test_unary_elementwise( + &Vector::::signum, + &Scalar::signum, + &|_| true, + ) + } + } test_helpers::test_lanes_panic! { @@ -426,6 +435,14 @@ macro_rules! impl_float_tests { ) } + fn mul_add() { + test_helpers::test_ternary_elementwise( + &Vector::::mul_add, + &Scalar::mul_add, + &|_, _, _| true, + ) + } + fn sqrt() { test_helpers::test_unary_elementwise( &Vector::::sqrt, @@ -433,6 +450,117 @@ macro_rules! impl_float_tests { &|_| true, ) } + + fn recip() { + test_helpers::test_unary_elementwise( + &Vector::::recip, + &Scalar::recip, + &|_| true, + ) + } + + fn to_degrees() { + test_helpers::test_unary_elementwise( + &Vector::::to_degrees, + &Scalar::to_degrees, + &|_| true, + ) + } + + fn to_radians() { + test_helpers::test_unary_elementwise( + &Vector::::to_radians, + &Scalar::to_radians, + &|_| true, + ) + } + + fn signum() { + test_helpers::test_unary_elementwise( + &Vector::::signum, + &Scalar::signum, + &|_| true, + ) + } + + fn copysign() { + test_helpers::test_binary_elementwise( + &Vector::::copysign, + &Scalar::copysign, + &|_, _| true, + ) + } + + fn min() { + // Regular conditions (both values aren't zero) + test_helpers::test_binary_elementwise( + &Vector::::min, + &Scalar::min, + // Reject the case where both values are zero with different signs + &|a, b| { + for (a, b) in a.iter().zip(b.iter()) { + if *a == 0. && *b == 0. && a.signum() != b.signum() { + return false; + } + } + true + } + ); + + // Special case where both values are zero + let p_zero = Vector::::splat(0.); + let n_zero = Vector::::splat(-0.); + assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.)); + } + + fn max() { + // Regular conditions (both values aren't zero) + test_helpers::test_binary_elementwise( + &Vector::::max, + &Scalar::max, + // Reject the case where both values are zero with different signs + &|a, b| { + for (a, b) in a.iter().zip(b.iter()) { + if *a == 0. && *b == 0. && a.signum() != b.signum() { + return false; + } + } + true + } + ); + + // Special case where both values are zero + let p_zero = Vector::::splat(0.); + let n_zero = Vector::::splat(-0.); + assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.)); + assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.)); + } + + fn clamp() { + test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { + for (min, max) in min.iter_mut().zip(max.iter_mut()) { + if max < min { + core::mem::swap(min, max); + } + if min.is_nan() { + *min = Scalar::NEG_INFINITY; + } + if max.is_nan() { + *max = Scalar::INFINITY; + } + } + + let mut result_scalar = [Scalar::default(); LANES]; + for i in 0..LANES { + result_scalar[i] = value[i].clamp(min[i], max[i]); + } + let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array(); + test_helpers::prop_assert_biteq!(result_scalar, result_vector); + Ok(()) + }) + } + fn horizontal_sum() { test_helpers::test_1(&|x| { test_helpers::prop_assert_biteq! ( diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index fffd088f4da..4f2380b8e5b 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -97,6 +97,27 @@ pub fn test_2( + f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult, +) { + let mut runner = proptest::test_runner::TestRunner::default(); + runner + .run( + &( + A::default_strategy(), + B::default_strategy(), + C::default_strategy(), + ), + |(a, b, c)| f(a, b, c), + ) + .unwrap(); +} + /// Test a unary vector function against a unary scalar function, applied elementwise. #[inline(never)] pub fn test_unary_elementwise( @@ -257,6 +278,47 @@ pub fn test_binary_scalar_lhs_elementwise< }); } +/// Test a ternary vector function against a ternary scalar function, applied elementwise. +#[inline(never)] +pub fn test_ternary_elementwise< + Scalar1, + Scalar2, + Scalar3, + ScalarResult, + Vector1, + Vector2, + Vector3, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2, Vector3) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2, Scalar3) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES], [Scalar3; LANES]) -> bool, +) where + Scalar1: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + Default + core::fmt::Debug + DefaultStrategy, + Scalar3: Copy + Default + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + Default + biteq::BitEq + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + Vector3: Into<[Scalar3; LANES]> + From<[Scalar3; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_3(&|x: [Scalar1; LANES], y: [Scalar2; LANES], z: [Scalar3; LANES]| { + proptest::prop_assume!(check(x, y, z)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y.into(), z.into()).into(); + let result_2: [ScalarResult; LANES] = { + let mut result = [ScalarResult::default(); LANES]; + for ((i1, (i2, i3)), o) in x.iter().zip(y.iter().zip(z.iter())).zip(result.iter_mut()) { + *o = fs(*i1, *i2, *i3); + } + result + }; + crate::prop_assert_biteq!(result_1, result_2); + Ok(()) + }); +} + /// Expand a const-generic test into separate tests for each possible lane count. #[macro_export] macro_rules! test_lanes {