Skip to content

Commit 8a410f0

Browse files
authored
Migrate _mm_add_ss to __m128 (rust-lang#265)
This commit starts the migration towards Intel's types one intrinsic at a time, starting with `_mm_add_ss`. This is mostly just to get a feel for what the tests will start to look like.
1 parent 488c28a commit 8a410f0

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

coresimd/src/x86/i586/sse.rs

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use core::ptr;
66
use simd_llvm::simd_shuffle4;
77
use v128::*;
88
use v64::*;
9+
use x86::__m128;
910

1011
#[cfg(test)]
1112
use stdsimd_test::assert_instr;
@@ -15,7 +16,7 @@ use stdsimd_test::assert_instr;
1516
#[inline(always)]
1617
#[target_feature = "+sse"]
1718
#[cfg_attr(test, assert_instr(addss))]
18-
pub unsafe fn _mm_add_ss(a: f32x4, b: f32x4) -> f32x4 {
19+
pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
1920
addss(a, b)
2021
}
2122

@@ -720,13 +721,13 @@ pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
720721
/// Alternatively:
721722
///
722723
/// ```text
723-
/// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a));
724+
/// let v = _mm_set_ps(d, c, b, a);
724725
/// ```
725726
#[inline(always)]
726727
#[target_feature = "+sse"]
727728
#[cfg_attr(test, assert_instr(unpcklps))]
728-
pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> f32x4 {
729-
f32x4::new(d, c, b, a)
729+
pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
730+
__m128(d, c, b, a)
730731
}
731732

732733
/// Construct a `f32x4` from four floating point values lowest to highest.
@@ -1600,7 +1601,7 @@ pub unsafe fn _MM_TRANSPOSE4_PS(
16001601
#[allow(improper_ctypes)]
16011602
extern "C" {
16021603
#[link_name = "llvm.x86.sse.add.ss"]
1603-
fn addss(a: f32x4, b: f32x4) -> f32x4;
1604+
fn addss(a: __m128, b: __m128) -> __m128;
16041605
#[link_name = "llvm.x86.sse.sub.ss"]
16051606
fn subss(a: f32x4, b: f32x4) -> f32x4;
16061607
#[link_name = "llvm.x86.sse.mul.ss"]
@@ -1697,11 +1698,26 @@ pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
16971698

16981699
#[cfg(test)]
16991700
mod tests {
1700-
use v128::*;
17011701
use x86::i586::sse;
1702+
use super::*;
17021703
use stdsimd_test::simd_test;
17031704
use test::black_box; // Used to inhibit constant-folding.
17041705

1706+
#[target_feature = "+sse"]
1707+
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
1708+
use std::mem;
1709+
let r = _mm_cmpeq_ps(mem::transmute(a), mem::transmute(b));
1710+
if _mm_movemask_ps(r) != 0b1111 {
1711+
panic!("{:?} != {:?}", a, b);
1712+
}
1713+
}
1714+
1715+
#[target_feature = "+sse"]
1716+
unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
1717+
union A { a: __m128, b: [f32; 4] };
1718+
mem::transmute::<__m128, A>(a).b[idx]
1719+
}
1720+
17051721
#[simd_test = "sse"]
17061722
unsafe fn _mm_add_ps() {
17071723
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
@@ -1711,11 +1727,11 @@ mod tests {
17111727
}
17121728

17131729
#[simd_test = "sse"]
1714-
unsafe fn _mm_add_ss() {
1715-
let a = f32x4::new(-1.0, 5.0, 0.0, -10.0);
1716-
let b = f32x4::new(-100.0, 20.0, 0.0, -5.0);
1717-
let r = sse::_mm_add_ss(a, b);
1718-
assert_eq!(r, f32x4::new(-101.0, 5.0, 0.0, -10.0));
1730+
unsafe fn test_mm_add_ss() {
1731+
let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
1732+
let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
1733+
let r = _mm_add_ss(a, b);
1734+
assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
17191735
}
17201736

17211737
#[simd_test = "sse"]
@@ -2220,7 +2236,8 @@ mod tests {
22202236
assert_eq!(rd, ed);
22212237
}
22222238

2223-
unsafe fn _mm_cmpeq_ps() {
2239+
#[simd_test = "sse"]
2240+
unsafe fn test_mm_cmpeq_ps() {
22242241
use std::mem::transmute;
22252242
use std::f32::NAN;
22262243

@@ -2234,6 +2251,7 @@ mod tests {
22342251
assert_eq!(r, e);
22352252
}
22362253

2254+
#[simd_test = "sse"]
22372255
unsafe fn _mm_cmplt_ps() {
22382256
use std::mem::transmute;
22392257
use std::f32::NAN;
@@ -2876,7 +2894,7 @@ mod tests {
28762894
}
28772895

28782896
#[simd_test = "sse"]
2879-
pub unsafe fn _mm_cvtss_f32() {
2897+
pub unsafe fn test_mm_cvtss_f32() {
28802898
let a = f32x4::new(312.0134, 5.0, 6.0, 7.0);
28812899
assert_eq!(sse::_mm_cvtss_f32(a), 312.0134);
28822900
}
@@ -2896,14 +2914,17 @@ mod tests {
28962914
}
28972915

28982916
#[simd_test = "sse"]
2899-
unsafe fn _mm_set_ps() {
2900-
let r = sse::_mm_set_ps(
2917+
unsafe fn test_mm_set_ps() {
2918+
let r = _mm_set_ps(
29012919
black_box(1.0),
29022920
black_box(2.0),
29032921
black_box(3.0),
29042922
black_box(4.0),
29052923
);
2906-
assert_eq!(r, f32x4::new(4.0, 3.0, 2.0, 1.0));
2924+
assert_eq!(get_m128(r, 0), 4.0);
2925+
assert_eq!(get_m128(r, 1), 3.0);
2926+
assert_eq!(get_m128(r, 2), 2.0);
2927+
assert_eq!(get_m128(r, 3), 1.0);
29072928
}
29082929

29092930
#[simd_test = "sse"]
@@ -3196,7 +3217,7 @@ mod tests {
31963217
}
31973218

31983219
#[simd_test = "sse"]
3199-
unsafe fn _mm_movemask_ps() {
3220+
unsafe fn test_mm_movemask_ps() {
32003221
let r = sse::_mm_movemask_ps(f32x4::new(-1.0, 5.0, -5.0, 0.0));
32013222
assert_eq!(r, 0b0101);
32023223

coresimd/src/x86/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
#[macro_use]
44
mod macros;
55

6+
#[repr(simd)]
7+
#[derive(Clone, Copy, Debug)]
8+
#[allow(non_camel_case_types)]
9+
pub struct __m128(f32, f32, f32, f32);
10+
611
mod i386;
712
pub use self::i386::*;
813

stdsimd-verify/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
9797
fn to_type(t: &syn::Type) -> Tokens {
9898
match *t {
9999
syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
100+
"__m128" => my_quote! { &F32x4 },
100101
"__m128i" => my_quote! { &I8x16 },
101102
"__m256i" => my_quote! { &I8x32 },
102103
"__m64" => my_quote! { &I8x8 },

0 commit comments

Comments
 (0)