@@ -6,6 +6,7 @@ use core::ptr;
66use simd_llvm:: simd_shuffle4;
77use v128:: * ;
88use v64:: * ;
9+ use x86:: __m128;
910
1011#[ cfg( test) ]
1112use stdsimd_test:: assert_instr;
@@ -15,7 +16,7 @@ use stdsimd_test::assert_instr;
1516#[ inline( always) ]
1617#[ target_feature = "+sse" ]
1718#[ cfg_attr( test, assert_instr( addss) ) ]
18- pub unsafe fn _mm_add_ss ( a : f32x4 , b : f32x4 ) -> f32x4 {
19+ pub unsafe fn _mm_add_ss ( a : __m128 , b : __m128 ) -> __m128 {
1920 addss ( a, b)
2021}
2122
@@ -720,13 +721,13 @@ pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
720721/// Alternatively:
721722///
722723/// ```text
723- /// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a) );
724+ /// let v = _mm_set_ps(d, c, b, a);
724725/// ```
725726#[ inline( always) ]
726727#[ target_feature = "+sse" ]
727728#[ cfg_attr( test, assert_instr( unpcklps) ) ]
728- pub unsafe fn _mm_set_ps ( a : f32 , b : f32 , c : f32 , d : f32 ) -> f32x4 {
729- f32x4 :: new ( d, c, b, a)
729+ pub unsafe fn _mm_set_ps ( a : f32 , b : f32 , c : f32 , d : f32 ) -> __m128 {
730+ __m128 ( d, c, b, a)
730731}
731732
732733/// Construct a `f32x4` from four floating point values lowest to highest.
@@ -1600,7 +1601,7 @@ pub unsafe fn _MM_TRANSPOSE4_PS(
16001601#[ allow( improper_ctypes) ]
16011602extern "C" {
16021603 #[ link_name = "llvm.x86.sse.add.ss" ]
1603- fn addss ( a : f32x4 , b : f32x4 ) -> f32x4 ;
1604+ fn addss ( a : __m128 , b : __m128 ) -> __m128 ;
16041605 #[ link_name = "llvm.x86.sse.sub.ss" ]
16051606 fn subss ( a : f32x4 , b : f32x4 ) -> f32x4 ;
16061607 #[ link_name = "llvm.x86.sse.mul.ss" ]
@@ -1697,11 +1698,26 @@ pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
16971698
16981699#[ cfg( test) ]
16991700mod tests {
1700- use v128:: * ;
17011701 use x86:: i586:: sse;
1702+ use super :: * ;
17021703 use stdsimd_test:: simd_test;
17031704 use test:: black_box; // Used to inhibit constant-folding.
17041705
1706+ #[ target_feature = "+sse" ]
1707+ unsafe fn assert_eq_m128 ( a : __m128 , b : __m128 ) {
1708+ use std:: mem;
1709+ let r = _mm_cmpeq_ps ( mem:: transmute ( a) , mem:: transmute ( b) ) ;
1710+ if _mm_movemask_ps ( r) != 0b1111 {
1711+ panic ! ( "{:?} != {:?}" , a, b) ;
1712+ }
1713+ }
1714+
1715+ #[ target_feature = "+sse" ]
1716+ unsafe fn get_m128 ( a : __m128 , idx : usize ) -> f32 {
1717+ union A { a : __m128 , b : [ f32 ; 4 ] } ;
1718+ mem:: transmute :: < __m128 , A > ( a) . b [ idx]
1719+ }
1720+
17051721 #[ simd_test = "sse" ]
17061722 unsafe fn _mm_add_ps ( ) {
17071723 let a = f32x4:: new ( -1.0 , 5.0 , 0.0 , -10.0 ) ;
@@ -1711,11 +1727,11 @@ mod tests {
17111727 }
17121728
17131729 #[ simd_test = "sse" ]
1714- unsafe fn _mm_add_ss ( ) {
1715- let a = f32x4 :: new ( -1.0 , 5.0 , 0.0 , -10.0 ) ;
1716- let b = f32x4 :: new ( -100.0 , 20.0 , 0.0 , -5.0 ) ;
1717- let r = sse :: _mm_add_ss ( a, b) ;
1718- assert_eq ! ( r, f32x4 :: new ( - 101 .0, 5.0 , 0.0 , -10 .0) ) ;
1730+ unsafe fn test_mm_add_ss ( ) {
1731+ let a = _mm_set_ps ( -1.0 , 5.0 , 0.0 , -10.0 ) ;
1732+ let b = _mm_set_ps ( -100.0 , 20.0 , 0.0 , -5.0 ) ;
1733+ let r = _mm_add_ss ( a, b) ;
1734+ assert_eq_m128 ( r, _mm_set_ps ( - 1 .0, 5.0 , 0.0 , -15 .0) ) ;
17191735 }
17201736
17211737 #[ simd_test = "sse" ]
@@ -2220,7 +2236,8 @@ mod tests {
22202236 assert_eq ! ( rd, ed) ;
22212237 }
22222238
2223- unsafe fn _mm_cmpeq_ps ( ) {
2239+ #[ simd_test = "sse" ]
2240+ unsafe fn test_mm_cmpeq_ps ( ) {
22242241 use std:: mem:: transmute;
22252242 use std:: f32:: NAN ;
22262243
@@ -2234,6 +2251,7 @@ mod tests {
22342251 assert_eq ! ( r, e) ;
22352252 }
22362253
2254+ #[ simd_test = "sse" ]
22372255 unsafe fn _mm_cmplt_ps ( ) {
22382256 use std:: mem:: transmute;
22392257 use std:: f32:: NAN ;
@@ -2876,7 +2894,7 @@ mod tests {
28762894 }
28772895
28782896 #[ simd_test = "sse" ]
2879- pub unsafe fn _mm_cvtss_f32 ( ) {
2897+ pub unsafe fn test_mm_cvtss_f32 ( ) {
28802898 let a = f32x4:: new ( 312.0134 , 5.0 , 6.0 , 7.0 ) ;
28812899 assert_eq ! ( sse:: _mm_cvtss_f32( a) , 312.0134 ) ;
28822900 }
@@ -2896,14 +2914,17 @@ mod tests {
28962914 }
28972915
28982916 #[ simd_test = "sse" ]
2899- unsafe fn _mm_set_ps ( ) {
2900- let r = sse :: _mm_set_ps (
2917+ unsafe fn test_mm_set_ps ( ) {
2918+ let r = _mm_set_ps (
29012919 black_box ( 1.0 ) ,
29022920 black_box ( 2.0 ) ,
29032921 black_box ( 3.0 ) ,
29042922 black_box ( 4.0 ) ,
29052923 ) ;
2906- assert_eq ! ( r, f32x4:: new( 4.0 , 3.0 , 2.0 , 1.0 ) ) ;
2924+ assert_eq ! ( get_m128( r, 0 ) , 4.0 ) ;
2925+ assert_eq ! ( get_m128( r, 1 ) , 3.0 ) ;
2926+ assert_eq ! ( get_m128( r, 2 ) , 2.0 ) ;
2927+ assert_eq ! ( get_m128( r, 3 ) , 1.0 ) ;
29072928 }
29082929
29092930 #[ simd_test = "sse" ]
@@ -3196,7 +3217,7 @@ mod tests {
31963217 }
31973218
31983219 #[ simd_test = "sse" ]
3199- unsafe fn _mm_movemask_ps ( ) {
3220+ unsafe fn test_mm_movemask_ps ( ) {
32003221 let r = sse:: _mm_movemask_ps ( f32x4:: new ( -1.0 , 5.0 , -5.0 , 0.0 ) ) ;
32013222 assert_eq ! ( r, 0b0101 ) ;
32023223
0 commit comments