@@ -12,6 +12,16 @@ use stdsimd_test::assert_instr;
1212
1313#[ allow( improper_ctypes) ]  
1414extern  "C"  { 
15+     #[ link_name = "llvm.x86.sse.cvtpi2ps" ]  
16+     fn  cvtpi2ps ( a :  f32x4 ,  b :  __m64 )  -> f32x4 ; 
17+     #[ link_name = "llvm.x86.mmx.pextr.w" ]  
18+     fn  pextrw ( a :  __m64 ,  imm8 :  i32 )  -> i32 ; 
19+     #[ link_name = "llvm.x86.mmx.pinsr.w" ]  
20+     fn  pinsrw ( a :  __m64 ,  d :  i32 ,  imm8 :  i32 )  -> __m64 ; 
21+     #[ link_name = "llvm.x86.mmx.pmovmskb" ]  
22+     fn  pmovmskb ( a :  __m64 )  -> i32 ; 
23+     #[ link_name = "llvm.x86.sse.pshuf.w" ]  
24+     fn  pshufw ( a :  __m64 ,  imm8 :  i8 )  -> __m64 ; 
1525    #[ link_name = "llvm.x86.mmx.pmaxs.w" ]  
1626    fn  pmaxsw ( a :  __m64 ,  b :  __m64 )  -> __m64 ; 
1727    #[ link_name = "llvm.x86.mmx.pmaxu.b" ]  
@@ -98,6 +108,64 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
98108    _mm_min_pu8 ( a,  b) 
99109} 
100110
111+ /// Converts two elements of a 64-bit vector of [2 x i32] into two 
112+ /// floating point values and writes them to the lower 64-bits of the 
113+ /// destination. The remaining higher order elements of the destination are 
114+ /// copied from the corresponding elements in the first operand. 
115+ #[ inline( always) ]  
116+ #[ target_feature = "+sse" ]  
117+ #[ cfg_attr( test,  assert_instr( cvtpi2ps) ) ]  
118+ pub  unsafe  fn  _mm_cvt_pi2ps ( a :  f32x4 ,  b :  i32x2 )  -> f32x4  { 
119+     cvtpi2ps ( a,  mem:: transmute ( b) ) 
120+ } 
121+ 
122+ /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and 
123+ /// returns it, as specified by the immediate integer operand. 
124+ #[ inline( always) ]  
125+ #[ target_feature = "+sse" ]  
126+ #[ cfg_attr( test,  assert_instr( pextrw,  imm2 = 0 ) ) ]  
127+ pub  unsafe  fn  _mm_extract_pi16 ( a :  i16x4 ,  imm2 :  i32 )  -> i16  { 
128+     macro_rules!  call { 
129+         ( $imm2: expr)  => {  pextrw( mem:: transmute( a) ,  $imm2)  as  i16  } 
130+     } 
131+     constify_imm2 ! ( imm2,  call) 
132+ } 
133+ 
134+ /// Copies data from the 64-bit vector of [4 x i16] to the destination, 
135+ /// and inserts the lower 16-bits of an integer operand at the 16-bit offset 
136+ /// specified by the immediate operand `n`. 
137+ #[ inline( always) ]  
138+ #[ target_feature = "+sse" ]  
139+ #[ cfg_attr( test,  assert_instr( pinsrw,  imm2 = 0 ) ) ]  
140+ pub  unsafe  fn  _mm_insert_pi16 ( a :  i16x4 ,  d :  i32 ,  imm2 :  i32 )  -> i16x4  { 
141+     macro_rules!  call { 
142+         ( $imm2: expr)  => {  mem:: transmute( pinsrw( mem:: transmute( a) ,  d,  $imm2) )  } 
143+     } 
144+     constify_imm2 ! ( imm2,  call) 
145+ } 
146+ 
147+ /// Takes the most significant bit from each 8-bit element in a 64-bit 
148+ /// integer vector to create a 16-bit mask value. Zero-extends the value to 
149+ /// 32-bit integer and writes it to the destination. 
150+ #[ inline( always) ]  
151+ #[ target_feature = "+sse" ]  
152+ #[ cfg_attr( test,  assert_instr( pmovmskb) ) ]  
153+ pub  unsafe  fn  _mm_movemask_pi8 ( a :  i16x4 )  -> i32  { 
154+     pmovmskb ( mem:: transmute ( a) ) 
155+ } 
156+ 
157+ /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the 
158+ /// destination, as specified by the immediate value operand. 
159+ #[ inline( always) ]  
160+ #[ target_feature = "+sse" ]  
161+ #[ cfg_attr( test,  assert_instr( pshufw,  imm8 = 0 ) ) ]  
162+ pub  unsafe  fn  _mm_shuffle_pi16 ( a :  i16x4 ,  imm8 :  i8 )  -> i16x4  { 
163+     macro_rules!  call { 
164+         ( $imm8: expr)  => {  mem:: transmute( pshufw( mem:: transmute( a) ,  $imm8) )  } 
165+     } 
166+     constify_imm8 ! ( imm8,  call) 
167+ } 
168+ 
101169/// Convert the two lower packed single-precision (32-bit) floating-point 
102170/// elements in `a` to packed 32-bit integers with truncation. 
103171#[ inline( always) ]  
@@ -205,6 +273,50 @@ mod tests {
205273        assert_eq ! ( r,  sse:: _m_pminub( a,  b) ) ; 
206274    } 
207275
276+     #[ simd_test = "sse" ]  
277+     unsafe  fn  _mm_cvt_pi2ps ( )  { 
278+         let  a = f32x4:: new ( 0. ,  0. ,  3. ,  4. ) ; 
279+         let  b = i32x2:: new ( 1 ,  2 ) ; 
280+         let  expected = f32x4:: new ( 1. ,  2. ,  3. ,  4. ) ; 
281+         let  r = sse:: _mm_cvt_pi2ps ( a,  b) ; 
282+         assert_eq ! ( r,  expected) ; 
283+     } 
284+ 
285+     #[ simd_test = "sse" ]  
286+     unsafe  fn  _mm_extract_pi16 ( )  { 
287+         let  a = i16x4:: new ( 1 ,  2 ,  3 ,  4 ) ; 
288+         let  r = sse:: _mm_extract_pi16 ( a,  0 ) ; 
289+         assert_eq ! ( r,  1 ) ; 
290+         let  r = sse:: _mm_extract_pi16 ( a,  1 ) ; 
291+         assert_eq ! ( r,  2 ) ; 
292+     } 
293+ 
294+     #[ simd_test = "sse" ]  
295+     unsafe  fn  _mm_insert_pi16 ( )  { 
296+         let  a = i16x4:: new ( 1 ,  2 ,  3 ,  4 ) ; 
297+         let  r = sse:: _mm_insert_pi16 ( a,  0 ,  0b0 ) ; 
298+         let  expected = i16x4:: new ( 0 ,  2 ,  3 ,  4 ) ; 
299+         assert_eq ! ( r,  expected) ; 
300+         let  r = sse:: _mm_insert_pi16 ( a,  0 ,  0b10 ) ; 
301+         let  expected = i16x4:: new ( 1 ,  2 ,  0 ,  4 ) ; 
302+         assert_eq ! ( r,  expected) ; 
303+     } 
304+ 
305+     #[ simd_test = "sse" ]  
306+     unsafe  fn  _mm_movemask_pi8 ( )  { 
307+         let  a = i16x4:: new ( 0b1000_0000 ,  0b0100_0000 ,  0b1000_0000 ,  0b0100_0000 ) ; 
308+         let  r = sse:: _mm_movemask_pi8 ( a) ; 
309+         assert_eq ! ( r,  0b10001 ) ; 
310+     } 
311+ 
312+     #[ simd_test = "sse" ]  
313+     unsafe  fn  _mm_shuffle_pi16 ( )  { 
314+         let  a = i16x4:: new ( 1 ,  2 ,  3 ,  4 ) ; 
315+         let  r = sse:: _mm_shuffle_pi16 ( a,  0b00_01_01_11 ) ; 
316+         let  expected = i16x4:: new ( 4 ,  2 ,  2 ,  1 ) ; 
317+         assert_eq ! ( r,  expected) ; 
318+     } 
319+ 
208320    #[ simd_test = "sse" ]  
209321    unsafe  fn  _mm_cvtps_pi32 ( )  { 
210322        let  a = f32x4:: new ( 1.0 ,  2.0 ,  3.0 ,  4.0 ) ; 
0 commit comments