Skip to content

Commit

Permalink
Add unsigned saturating add/sub intrinsics for aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed Nov 4, 2023
1 parent 88c2e78 commit 70a6abf
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
21 changes: 21 additions & 0 deletions example/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,24 @@ unsafe fn test_vpadd_u8() {
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vqsub_u8() {
let a = u8x8::from([1, 2, 3, 4, 5, 6, 7, 0xff]);
let b = u8x8::from([30, 1, 1, 1, 34, 0xff, 36, 37]);
let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b)));
let e = u8x8::from([0, 1, 2, 3, 0, 0, 0, 218]);
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vqadd_u8() {
let a = u8x8::from([1, 2, 3, 4, 5, 6, 7, 0xff]);
let b = u8x8::from([30, 1, 1, 1, 34, 0xff, 36, 37]);
let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b)));
let e = u8x8::from([31, 3, 4, 5, 39, 0xff, 43, 0xff]);
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
fn main() {
unsafe {
Expand All @@ -204,6 +222,9 @@ fn main() {
test_vpadd_u16();
test_vpadd_u32();
test_vpadd_u8();

test_vqsub_u8();
test_vqadd_u8();
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/intrinsics/llvm_aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,19 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
});
}

_ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => {
_ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v")
|| intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") =>
{
intrinsic_args!(fx, args => (x, y); intrinsic);

simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
crate::num::codegen_saturating_int_binop(fx, BinOp::Add, x_lane, y_lane)
});
}

_ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => {
_ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v")
|| intrinsic.starts_with("llvm.aarch64.neon.uqsub.v") =>
{
intrinsic_args!(fx, args => (x, y); intrinsic);

simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
Expand Down

0 comments on commit 70a6abf

Please sign in to comment.