Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 40 additions & 32 deletions crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2585,44 +2585,52 @@ pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_imm8!(IMM8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
if shift > 15 || i % 16 < shift {
0
} else {
32 + (i - shift)
}
}
let a = a.as_i8x32();
let zero = _mm256_setzero_si256().as_i8x32();
let r: i8x32 = simd_shuffle32!(
zero,
a,
<const IMM8: i32> [
32 - (IMM8 as u32 & 0xff),
33 - (IMM8 as u32 & 0xff),
34 - (IMM8 as u32 & 0xff),
35 - (IMM8 as u32 & 0xff),
36 - (IMM8 as u32 & 0xff),
37 - (IMM8 as u32 & 0xff),
38 - (IMM8 as u32 & 0xff),
39 - (IMM8 as u32 & 0xff),
40 - (IMM8 as u32 & 0xff),
41 - (IMM8 as u32 & 0xff),
42 - (IMM8 as u32 & 0xff),
43 - (IMM8 as u32 & 0xff),
44 - (IMM8 as u32 & 0xff),
45 - (IMM8 as u32 & 0xff),
46 - (IMM8 as u32 & 0xff),
47 - (IMM8 as u32 & 0xff),
48 - (IMM8 as u32 & 0xff) - 16,
49 - (IMM8 as u32 & 0xff) - 16,
50 - (IMM8 as u32 & 0xff) - 16,
51 - (IMM8 as u32 & 0xff) - 16,
52 - (IMM8 as u32 & 0xff) - 16,
53 - (IMM8 as u32 & 0xff) - 16,
54 - (IMM8 as u32 & 0xff) - 16,
55 - (IMM8 as u32 & 0xff) - 16,
56 - (IMM8 as u32 & 0xff) - 16,
57 - (IMM8 as u32 & 0xff) - 16,
58 - (IMM8 as u32 & 0xff) - 16,
59 - (IMM8 as u32 & 0xff) - 16,
60 - (IMM8 as u32 & 0xff) - 16,
61 - (IMM8 as u32 & 0xff) - 16,
62 - (IMM8 as u32 & 0xff) - 16,
63 - (IMM8 as u32 & 0xff) - 16,
mask(IMM8, 0),
mask(IMM8, 1),
mask(IMM8, 2),
mask(IMM8, 3),
mask(IMM8, 4),
mask(IMM8, 5),
mask(IMM8, 6),
mask(IMM8, 7),
mask(IMM8, 8),
mask(IMM8, 9),
mask(IMM8, 10),
mask(IMM8, 11),
mask(IMM8, 12),
mask(IMM8, 13),
mask(IMM8, 14),
mask(IMM8, 15),
mask(IMM8, 16),
mask(IMM8, 17),
mask(IMM8, 18),
mask(IMM8, 19),
mask(IMM8, 20),
mask(IMM8, 21),
mask(IMM8, 22),
mask(IMM8, 23),
mask(IMM8, 24),
mask(IMM8, 25),
mask(IMM8, 26),
mask(IMM8, 27),
mask(IMM8, 28),
mask(IMM8, 29),
mask(IMM8, 30),
mask(IMM8, 31),
],
);
transmute(r)
Expand Down
136 changes: 72 additions & 64 deletions crates/core_arch/src/x86/avx512bw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8873,76 +8873,84 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
static_assert_imm8!(IMM8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
if shift > 15 || i % 16 < shift {
0
} else {
64 + (i - shift)
}
}
let a = a.as_i8x64();
let zero = _mm512_setzero_si512().as_i8x64();
let r: i8x64 = simd_shuffle64!(
zero,
a,
<const IMM8: i32> [
64 - (IMM8 as u32 & 0xff),
65 - (IMM8 as u32 & 0xff),
66 - (IMM8 as u32 & 0xff),
67 - (IMM8 as u32 & 0xff),
68 - (IMM8 as u32 & 0xff),
69 - (IMM8 as u32 & 0xff),
70 - (IMM8 as u32 & 0xff),
71 - (IMM8 as u32 & 0xff),
72 - (IMM8 as u32 & 0xff),
73 - (IMM8 as u32 & 0xff),
74 - (IMM8 as u32 & 0xff),
75 - (IMM8 as u32 & 0xff),
76 - (IMM8 as u32 & 0xff),
77 - (IMM8 as u32 & 0xff),
78 - (IMM8 as u32 & 0xff),
79 - (IMM8 as u32 & 0xff),
80 - (IMM8 as u32 & 0xff) - 16,
81 - (IMM8 as u32 & 0xff) - 16,
82 - (IMM8 as u32 & 0xff) - 16,
83 - (IMM8 as u32 & 0xff) - 16,
84 - (IMM8 as u32 & 0xff) - 16,
85 - (IMM8 as u32 & 0xff) - 16,
86 - (IMM8 as u32 & 0xff) - 16,
87 - (IMM8 as u32 & 0xff) - 16,
88 - (IMM8 as u32 & 0xff) - 16,
89 - (IMM8 as u32 & 0xff) - 16,
90 - (IMM8 as u32 & 0xff) - 16,
91 - (IMM8 as u32 & 0xff) - 16,
92 - (IMM8 as u32 & 0xff) - 16,
93 - (IMM8 as u32 & 0xff) - 16,
94 - (IMM8 as u32 & 0xff) - 16,
95 - (IMM8 as u32 & 0xff) - 16,
96 - (IMM8 as u32 & 0xff) - 32,
97 - (IMM8 as u32 & 0xff) - 32,
98 - (IMM8 as u32 & 0xff) - 32,
99 - (IMM8 as u32 & 0xff) - 32,
100 - (IMM8 as u32 & 0xff) - 32,
101 - (IMM8 as u32 & 0xff) - 32,
102 - (IMM8 as u32 & 0xff) - 32,
103 - (IMM8 as u32 & 0xff) - 32,
104 - (IMM8 as u32 & 0xff) - 32,
105 - (IMM8 as u32 & 0xff) - 32,
106 - (IMM8 as u32 & 0xff) - 32,
107 - (IMM8 as u32 & 0xff) - 32,
108 - (IMM8 as u32 & 0xff) - 32,
109 - (IMM8 as u32 & 0xff) - 32,
110 - (IMM8 as u32 & 0xff) - 32,
111 - (IMM8 as u32 & 0xff) - 32,
112 - (IMM8 as u32 & 0xff) - 48,
113 - (IMM8 as u32 & 0xff) - 48,
114 - (IMM8 as u32 & 0xff) - 48,
115 - (IMM8 as u32 & 0xff) - 48,
116 - (IMM8 as u32 & 0xff) - 48,
117 - (IMM8 as u32 & 0xff) - 48,
118 - (IMM8 as u32 & 0xff) - 48,
119 - (IMM8 as u32 & 0xff) - 48,
120 - (IMM8 as u32 & 0xff) - 48,
121 - (IMM8 as u32 & 0xff) - 48,
122 - (IMM8 as u32 & 0xff) - 48,
123 - (IMM8 as u32 & 0xff) - 48,
124 - (IMM8 as u32 & 0xff) - 48,
125 - (IMM8 as u32 & 0xff) - 48,
126 - (IMM8 as u32 & 0xff) - 48,
127 - (IMM8 as u32 & 0xff) - 48,
mask(IMM8, 0),
mask(IMM8, 1),
mask(IMM8, 2),
mask(IMM8, 3),
mask(IMM8, 4),
mask(IMM8, 5),
mask(IMM8, 6),
mask(IMM8, 7),
mask(IMM8, 8),
mask(IMM8, 9),
mask(IMM8, 10),
mask(IMM8, 11),
mask(IMM8, 12),
mask(IMM8, 13),
mask(IMM8, 14),
mask(IMM8, 15),
mask(IMM8, 16),
mask(IMM8, 17),
mask(IMM8, 18),
mask(IMM8, 19),
mask(IMM8, 20),
mask(IMM8, 21),
mask(IMM8, 22),
mask(IMM8, 23),
mask(IMM8, 24),
mask(IMM8, 25),
mask(IMM8, 26),
mask(IMM8, 27),
mask(IMM8, 28),
mask(IMM8, 29),
mask(IMM8, 30),
mask(IMM8, 31),
mask(IMM8, 32),
mask(IMM8, 33),
mask(IMM8, 34),
mask(IMM8, 35),
mask(IMM8, 36),
mask(IMM8, 37),
mask(IMM8, 38),
mask(IMM8, 39),
mask(IMM8, 40),
mask(IMM8, 41),
mask(IMM8, 42),
mask(IMM8, 43),
mask(IMM8, 44),
mask(IMM8, 45),
mask(IMM8, 46),
mask(IMM8, 47),
mask(IMM8, 48),
mask(IMM8, 49),
mask(IMM8, 50),
mask(IMM8, 51),
mask(IMM8, 52),
mask(IMM8, 53),
mask(IMM8, 54),
mask(IMM8, 55),
mask(IMM8, 56),
mask(IMM8, 57),
mask(IMM8, 58),
mask(IMM8, 59),
mask(IMM8, 60),
mask(IMM8, 61),
mask(IMM8, 62),
mask(IMM8, 63),
],
);
transmute(r)
Expand Down
5 changes: 3 additions & 2 deletions crates/core_arch/src/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,10 +425,11 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
#[target_feature(enable = "sse2")]
unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
const fn mask(shift: i32, i: u32) -> u32 {
if (shift as u32) > 15 {
let shift = shift as u32 & 0xff;
if shift > 15 {
i
} else {
16 - (shift as u32) + i
16 - shift + i
}
}
let zero = _mm_set1_epi8(0).as_i8x16();
Expand Down