Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
// to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true,

("simd", "simd_bit_shift") => return true, // FIXME Unsupported feature: proposed SIMD operator I8x16Shl
("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I16x8NarrowI32x4S
("simd", "simd_f32x4") => return true, // FIXME expected V128(F32x4([CanonicalNan, CanonicalNan, Value(Float32 { bits: 0 }), Value(Float32 { bits: 0 })])), got V128(18428729675200069632)
("simd", "simd_f64x2") => return true, // FIXME expected V128(F64x2([Value(Float64 { bits: 9221120237041090560 }), Value(Float64 { bits: 0 })])), got V128(0)
Expand Down
24 changes: 22 additions & 2 deletions cranelift/codegen/meta/src/isa/x86/legalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,15 +493,16 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
);
}

// SIMD shift right (arithmetic)
for ty in &[I16, I32, I64] {
// SIMD shift right (arithmetic, i16x8 and i32x4)
for ty in &[I16, I32] {
let sshr = sshr.bind(vector(*ty, sse_vector_size));
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
narrow.legalize(
def!(a = sshr(x, y)),
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
);
}
// SIMD shift right (arithmetic, i8x16)
{
let sshr = sshr.bind(vector(I8, sse_vector_size));
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
Expand All @@ -526,6 +527,25 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
],
);
}
// SIMD shift right (arithmetic, i64x2)
{
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
let sshr_scalar_lane0 = sshr.bind(I64);
let sshr_scalar_lane1 = sshr.bind(I64);
narrow.legalize(
def!(z = sshr_vector(x, y)),
vec![
// Use scalar operations to shift the first lane.
def!(a = extractlane(x, uimm8_zero)),
def!(b = sshr_scalar_lane0(a, y)),
def!(c = insertlane(x, uimm8_zero, b)),
// Do the same for the second lane.
def!(d = extractlane(x, uimm8_one)),
def!(e = sshr_scalar_lane1(d, y)),
def!(z = insertlane(c, uimm8_one, e)),
],
);
}

// SIMD select
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
Expand Down
14 changes: 14 additions & 0 deletions cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,20 @@ block0:
return v2
}

function %sshr_i64x2() -> i64x2 {
block0:
v0 = iconst.i32 1
v1 = vconst.i64x2 [1 2]
v2 = sshr v1, v0
; check: v3 = x86_pextr v1, 0
; nextln: v4 = sshr v3, v0
; nextln: v5 = x86_pinsr v1, 0, v4
; nextln: v6 = x86_pextr v1, 1
; nextln: v7 = sshr v6, v0
; nextln: v2 = x86_pinsr v5, 1, v7
return v2
}

function %bitselect_i16x8() -> i16x8 {
block0:
v0 = vconst.i16x8 [0 0 0 0 0 0 0 0]
Expand Down
10 changes: 10 additions & 0 deletions cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ block0:
}
; run

function %sshr_i64x2(i64x2, i32) -> i64x2 {
block0(v0:i64x2, v1:i32):
v2 = sshr v0, v1
return v2
}
; run: %sshr_i64x2([1 -1], 0) == [1 -1]
; run: %sshr_i64x2([1 -1], 1) == [0 -1] ; note the -1 shift result
; run: %sshr_i64x2([2 -2], 1) == [1 -1]
; run: %sshr_i64x2([0x80000000_00000000 0x7FFFFFFF_FFFFFFFF], 63) == [0xFFFFFFFF_FFFFFFFF 0]

function %bitselect_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255] ; the selector vector
Expand Down
9 changes: 4 additions & 5 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16Shl | Operator::I16x8Shl | Operator::I32x4Shl | Operator::I64x2Shl => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
Expand All @@ -1411,16 +1411,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16ShrU | Operator::I16x8ShrU | Operator::I32x4ShrU | Operator::I64x2ShrU => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
state.push1(builder.ins().ushr(bitcast_a, b_mod_bitwidth))
}
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS => {
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS | Operator::I64x2ShrS => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
Expand Down Expand Up @@ -1544,7 +1544,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
}
Operator::I8x16Mul
| Operator::I64x2Mul
| Operator::I64x2ShrS
| Operator::I32x4TruncSatF32x4S
| Operator::I32x4TruncSatF32x4U
| Operator::I64x2TruncSatF64x2S
Expand Down