Skip to content

Commit 31ac537

Browse files
committed
Auto merge of rust-lang#3093 - eduardosm:llvm.x86.sse2.pmadd.wd, r=RalfJung
Implement the `llvm.x86.sse2.pmadd.wd` intrinsic
2 parents 7674e3f + 0c0c088 commit 31ac537

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

src/tools/miri/src/shims/x86/sse2.rs

+36
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,42 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
8282
this.write_immediate(*res, &dest)?;
8383
}
8484
}
85+
// Used to implement the _mm_madd_epi16 function.
86+
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
87+
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
88+
// intermediate 32-bit integers, and pack the results in `dest`.
89+
"pmadd.wd" => {
90+
let [left, right] =
91+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
92+
93+
let (left, left_len) = this.operand_to_simd(left)?;
94+
let (right, right_len) = this.operand_to_simd(right)?;
95+
let (dest, dest_len) = this.place_to_simd(dest)?;
96+
97+
assert_eq!(left_len, right_len);
98+
assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
99+
100+
for i in 0..dest_len {
101+
let j1 = i.checked_mul(2).unwrap();
102+
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
103+
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
104+
105+
let j2 = j1.checked_add(1).unwrap();
106+
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
107+
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
108+
109+
let dest = this.project_index(&dest, i)?;
110+
111+
// Multiplications are i16*i16->i32, which will not overflow.
112+
let mul1 = i32::from(left1).checked_mul(right1.into()).unwrap();
113+
let mul2 = i32::from(left2).checked_mul(right2.into()).unwrap();
114+
// However, this addition can overflow in the most extreme case
115+
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
116+
let res = mul1.wrapping_add(mul2);
117+
118+
this.write_scalar(Scalar::from_i32(res), &dest)?;
119+
}
120+
}
85121
// Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
86122
"pmulh.w" | "pmulhu.w" => {
87123
let [left, right] =

src/tools/miri/tests/pass/intrinsics-x86-sse2.rs

+18
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,24 @@ mod tests {
7070
}
7171
test_mm_avg_epu16();
7272

73+
#[target_feature(enable = "sse2")]
74+
unsafe fn test_mm_madd_epi16() {
75+
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
76+
let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
77+
let r = _mm_madd_epi16(a, b);
78+
let e = _mm_setr_epi32(29, 81, 149, 233);
79+
assert_eq_m128i(r, e);
80+
81+
let a =
82+
_mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
83+
let b =
84+
_mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
85+
let r = _mm_madd_epi16(a, b);
86+
let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
87+
assert_eq_m128i(r, e);
88+
}
89+
test_mm_madd_epi16();
90+
7391
#[target_feature(enable = "sse2")]
7492
unsafe fn test_mm_mulhi_epi16() {
7593
let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));

0 commit comments

Comments
 (0)