Skip to content

Commit 50f1c8f

Browse files
committed
Drop swizzle API
Swizzles are not the right approach. We rather want to have powerful general permutation capabilities and leave optimization to swizzle instructions up to the compiler. Refs: gh-26 Signed-off-by: Matthias Kretz <[email protected]>
1 parent 8c7d43c commit 50f1c8f

File tree

7 files changed

+0
-184
lines changed

7 files changed

+0
-184
lines changed

Diff for: avx/vector.h

-16
Original file line numberDiff line numberDiff line change
@@ -215,22 +215,6 @@ template <typename T> class Vector<T, VectorAbi::Avx>
215215
Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
216216
Vc_INTRINSIC_L void setQnan(MaskArg k) Vc_INTRINSIC_R;
217217

218-
///////////////////////////////////////////////////////////////////////////////////////////
219-
// swizzles
220-
Vc_INTRINSIC_L Vc_PURE_L const Vector &abcd() const Vc_INTRINSIC_R Vc_PURE_R;
221-
Vc_INTRINSIC_L Vc_PURE_L const Vector cdab() const Vc_INTRINSIC_R Vc_PURE_R;
222-
Vc_INTRINSIC_L Vc_PURE_L const Vector badc() const Vc_INTRINSIC_R Vc_PURE_R;
223-
Vc_INTRINSIC_L Vc_PURE_L const Vector aaaa() const Vc_INTRINSIC_R Vc_PURE_R;
224-
Vc_INTRINSIC_L Vc_PURE_L const Vector bbbb() const Vc_INTRINSIC_R Vc_PURE_R;
225-
Vc_INTRINSIC_L Vc_PURE_L const Vector cccc() const Vc_INTRINSIC_R Vc_PURE_R;
226-
Vc_INTRINSIC_L Vc_PURE_L const Vector dddd() const Vc_INTRINSIC_R Vc_PURE_R;
227-
Vc_INTRINSIC_L Vc_PURE_L const Vector bcad() const Vc_INTRINSIC_R Vc_PURE_R;
228-
Vc_INTRINSIC_L Vc_PURE_L const Vector bcda() const Vc_INTRINSIC_R Vc_PURE_R;
229-
Vc_INTRINSIC_L Vc_PURE_L const Vector dabc() const Vc_INTRINSIC_R Vc_PURE_R;
230-
Vc_INTRINSIC_L Vc_PURE_L const Vector acbd() const Vc_INTRINSIC_R Vc_PURE_R;
231-
Vc_INTRINSIC_L Vc_PURE_L const Vector dbca() const Vc_INTRINSIC_R Vc_PURE_R;
232-
Vc_INTRINSIC_L Vc_PURE_L const Vector dcba() const Vc_INTRINSIC_R Vc_PURE_R;
233-
234218
#include "../common/gatherinterface.h"
235219
#include "../common/scatterinterface.h"
236220

Diff for: avx/vector.tcc

-48
Original file line numberDiff line numberDiff line change
@@ -131,54 +131,6 @@ Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::store(U *mem, Mask mask, Flags flag
131131
}
132132

133133
///////////////////////////////////////////////////////////////////////////////////////////
134-
///////////////////////////////////////////////////////////////////////////////////////////
135-
// swizzles {{{1
136-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE &Vector<T, VectorAbi::Avx>::abcd() const { return *this; }
137-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::cdab() const { return Mem::permute<X2, X3, X0, X1>(data()); }
138-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::badc() const { return Mem::permute<X1, X0, X3, X2>(data()); }
139-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::aaaa() const { return Mem::permute<X0, X0, X0, X0>(data()); }
140-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bbbb() const { return Mem::permute<X1, X1, X1, X1>(data()); }
141-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::cccc() const { return Mem::permute<X2, X2, X2, X2>(data()); }
142-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dddd() const { return Mem::permute<X3, X3, X3, X3>(data()); }
143-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bcad() const { return Mem::permute<X1, X2, X0, X3>(data()); }
144-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bcda() const { return Mem::permute<X1, X2, X3, X0>(data()); }
145-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dabc() const { return Mem::permute<X3, X0, X1, X2>(data()); }
146-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::acbd() const { return Mem::permute<X0, X2, X1, X3>(data()); }
147-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dbca() const { return Mem::permute<X3, X1, X2, X0>(data()); }
148-
template<typename T> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dcba() const { return Mem::permute<X3, X2, X1, X0>(data()); }
149-
150-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::cdab() const { return Mem::shuffle128<X1, X0>(data(), data()); }
151-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::badc() const { return Mem::permute<X1, X0, X3, X2>(data()); }
152-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::aaaa() const { const double &tmp = d.m(0); return _mm256_broadcast_sd(&tmp); }
153-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::bbbb() const { const double &tmp = d.m(1); return _mm256_broadcast_sd(&tmp); }
154-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::cccc() const { const double &tmp = d.m(2); return _mm256_broadcast_sd(&tmp); }
155-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::dddd() const { const double &tmp = d.m(3); return _mm256_broadcast_sd(&tmp); }
156-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::bcad() const { return Mem::shuffle<X1, Y0, X2, Y3>(Mem::shuffle128<X0, X0>(data(), data()), Mem::shuffle128<X1, X1>(data(), data())); }
157-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::bcda() const { return Mem::shuffle<X1, Y0, X3, Y2>(data(), Mem::shuffle128<X1, X0>(data(), data())); }
158-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::dabc() const { return Mem::shuffle<X1, Y0, X3, Y2>(Mem::shuffle128<X1, X0>(data(), data()), data()); }
159-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::acbd() const { return Mem::shuffle<X0, Y0, X3, Y3>(Mem::shuffle128<X0, X0>(data(), data()), Mem::shuffle128<X1, X1>(data(), data())); }
160-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::dbca() const { return Mem::shuffle<X1, Y1, X2, Y2>(Mem::shuffle128<X1, X1>(data(), data()), Mem::shuffle128<X0, X0>(data(), data())); }
161-
template<> Vc_INTRINSIC const AVX2::double_v Vc_PURE AVX2::double_v::dcba() const { return cdab().badc(); }
162-
163-
#define VC_SWIZZLES_16BIT_IMPL(T) \
164-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::cdab() const { return Mem::permute<X2, X3, X0, X1, X6, X7, X4, X5>(data()); } \
165-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::badc() const { return Mem::permute<X1, X0, X3, X2, X5, X4, X7, X6>(data()); } \
166-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::aaaa() const { return Mem::permute<X0, X0, X0, X0, X4, X4, X4, X4>(data()); } \
167-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bbbb() const { return Mem::permute<X1, X1, X1, X1, X5, X5, X5, X5>(data()); } \
168-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::cccc() const { return Mem::permute<X2, X2, X2, X2, X6, X6, X6, X6>(data()); } \
169-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dddd() const { return Mem::permute<X3, X3, X3, X3, X7, X7, X7, X7>(data()); } \
170-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bcad() const { return Mem::permute<X1, X2, X0, X3, X5, X6, X4, X7>(data()); } \
171-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::bcda() const { return Mem::permute<X1, X2, X3, X0, X5, X6, X7, X4>(data()); } \
172-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dabc() const { return Mem::permute<X3, X0, X1, X2, X7, X4, X5, X6>(data()); } \
173-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::acbd() const { return Mem::permute<X0, X2, X1, X3, X4, X6, X5, X7>(data()); } \
174-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dbca() const { return Mem::permute<X3, X1, X2, X0, X7, X5, X6, X4>(data()); } \
175-
template<> Vc_INTRINSIC const AVX2::Vector<T> Vc_PURE Vector<T, VectorAbi::Avx>::dcba() const { return Mem::permute<X3, X2, X1, X0, X7, X6, X5, X4>(data()); }
176-
#ifdef VC_IMPL_AVX2
177-
VC_SWIZZLES_16BIT_IMPL(short)
178-
VC_SWIZZLES_16BIT_IMPL(unsigned short)
179-
#endif
180-
#undef VC_SWIZZLES_16BIT_IMPL
181-
182134
///////////////////////////////////////////////////////////////////////////////////////////
183135
// division {{{1
184136
template<typename T> inline AVX2::Vector<T> &Vector<T, VectorAbi::Avx>::operator/=(EntryType x)

Diff for: mic/vector.h

-16
Original file line numberDiff line numberDiff line change
@@ -182,22 +182,6 @@ class Vector<T, VectorAbi::Mic> : public MIC::StoreMixin<MIC::Vector<T>, T>
182182
///////////////////////////////////////////////////////////////////////////////////////////
183183
// stores in StoreMixin
184184

185-
///////////////////////////////////////////////////////////////////////////////////////////
186-
// swizzles
187-
Vc_INTRINSIC_L Vc_CONST_L const Vector<T> &abcd() const Vc_INTRINSIC_R Vc_CONST_R;
188-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> cdab() const Vc_INTRINSIC_R Vc_CONST_R;
189-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> badc() const Vc_INTRINSIC_R Vc_CONST_R;
190-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> aaaa() const Vc_INTRINSIC_R Vc_CONST_R;
191-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> bbbb() const Vc_INTRINSIC_R Vc_CONST_R;
192-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> cccc() const Vc_INTRINSIC_R Vc_CONST_R;
193-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> dddd() const Vc_INTRINSIC_R Vc_CONST_R;
194-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> bcad() const Vc_INTRINSIC_R Vc_CONST_R;
195-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> bcda() const Vc_INTRINSIC_R Vc_CONST_R;
196-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> dabc() const Vc_INTRINSIC_R Vc_CONST_R;
197-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> acbd() const Vc_INTRINSIC_R Vc_CONST_R;
198-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> dbca() const Vc_INTRINSIC_R Vc_CONST_R;
199-
Vc_INTRINSIC_L Vc_CONST_L Vector<T> dcba() const Vc_INTRINSIC_R Vc_CONST_R;
200-
201185
#include "../common/gatherinterface.h"
202186
#include "../common/scatterinterface.h"
203187

Diff for: mic/vector.tcc

-40
Original file line numberDiff line numberDiff line change
@@ -299,46 +299,6 @@ template<typename Parent, typename T> Vc_INTRINSIC void StoreMixin<Parent, T>::s
299299
}
300300
} // namespace MIC
301301

302-
// swizzles {{{1
303-
template<typename T> Vc_INTRINSIC Vc_CONST const Vector<T, VectorAbi::Mic> &Vector<T, VectorAbi::Mic>::abcd() const { return *this; }
304-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::cdab() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_BADC); }
305-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::badc() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_CDAB); }
306-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::aaaa() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_AAAA); }
307-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::bbbb() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_BBBB); }
308-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::cccc() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_CCCC); }
309-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::dddd() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_DDDD); }
310-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::bcad() const { return MicIntrinsics::swizzle(d.v(), _MM_SWIZ_REG_DACB); }
311-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::bcda() const { return MicIntrinsics::shuffle(d.v(), _MM_PERM_ADCB); }
312-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::dabc() const { return MicIntrinsics::shuffle(d.v(), _MM_PERM_CBAD); }
313-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::acbd() const { return MicIntrinsics::shuffle(d.v(), _MM_PERM_DBCA); }
314-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::dbca() const { return MicIntrinsics::shuffle(d.v(), _MM_PERM_ACBD); }
315-
template<typename T> Vc_INTRINSIC Vc_CONST Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::dcba() const { return MicIntrinsics::shuffle(d.v(), _MM_PERM_ABCD); }
316-
317-
template<> Vc_INTRINSIC Vc_CONST MIC::double_v MIC::double_v::bcda() const {
318-
//ADCB
319-
auto &&tmp = _mm512_swizzle_pd(d.v(), _MM_SWIZ_REG_DACB);
320-
return _mm512_mask_swizzle_pd(tmp, 0xcc, tmp, _MM_SWIZ_REG_CDAB);
321-
}
322-
template<> Vc_INTRINSIC Vc_CONST MIC::double_v MIC::double_v::dabc() const {
323-
//CBAD
324-
auto &&tmp = _mm512_mask_swizzle_pd(d.v(), 0xaa, d.v(), _MM_SWIZ_REG_BADC); // BCDA
325-
return _mm512_swizzle_pd(tmp, _MM_SWIZ_REG_CDAB);
326-
}
327-
template<> Vc_INTRINSIC Vc_CONST MIC::double_v MIC::double_v::acbd() const {
328-
//DBCA
329-
auto &&tmp = _mm512_swizzle_pd(d.v(), _MM_SWIZ_REG_BADC); // BXXC
330-
return _mm512_mask_swizzle_pd(d.v(), 0x66, tmp, _MM_SWIZ_REG_CDAB); // XBCX
331-
}
332-
template<> Vc_INTRINSIC Vc_CONST MIC::double_v MIC::double_v::dbca() const {
333-
//ACBD
334-
auto &&tmp = _mm512_swizzle_pd(d.v(), _MM_SWIZ_REG_BADC); // XADX
335-
return _mm512_mask_swizzle_pd(d.v(), 0x99, tmp, _MM_SWIZ_REG_CDAB); // AXXD
336-
}
337-
template<> Vc_INTRINSIC Vc_CONST MIC::double_v MIC::double_v::dcba() const {
338-
//ABCD
339-
return _mm512_swizzle_pd(_mm512_swizzle_pd(d.v(), _MM_SWIZ_REG_CDAB), _MM_SWIZ_REG_BADC);
340-
}
341-
///////////////////////////////////////////////////////////////////////////////////////////
342302
// negation {{{1
343303
template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Mic> Vector<T, VectorAbi::Mic>::operator-() const
344304
{

Diff for: scalar/vector.h

-16
Original file line numberDiff line numberDiff line change
@@ -129,22 +129,6 @@ template <typename T> class Vector<T, VectorAbi::Scalar>
129129
Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
130130
Vc_INTRINSIC_L void setQnan(Mask m) Vc_INTRINSIC_R;
131131

132-
///////////////////////////////////////////////////////////////////////////////////////////
133-
// swizzles
134-
Vc_INTRINSIC const Vector &abcd() const { return *this; }
135-
Vc_INTRINSIC const Vector cdab() const { return *this; }
136-
Vc_INTRINSIC const Vector badc() const { return *this; }
137-
Vc_INTRINSIC const Vector aaaa() const { return *this; }
138-
Vc_INTRINSIC const Vector bbbb() const { return *this; }
139-
Vc_INTRINSIC const Vector cccc() const { return *this; }
140-
Vc_INTRINSIC const Vector dddd() const { return *this; }
141-
Vc_INTRINSIC const Vector bcad() const { return *this; }
142-
Vc_INTRINSIC const Vector bcda() const { return *this; }
143-
Vc_INTRINSIC const Vector dabc() const { return *this; }
144-
Vc_INTRINSIC const Vector acbd() const { return *this; }
145-
Vc_INTRINSIC const Vector dbca() const { return *this; }
146-
Vc_INTRINSIC const Vector dcba() const { return *this; }
147-
148132
#include "../common/gatherinterface.h"
149133
#include "../common/scatterinterface.h"
150134

Diff for: sse/vector.h

-16
Original file line numberDiff line numberDiff line change
@@ -149,22 +149,6 @@ template <typename T> class Vector<T, VectorAbi::Sse>
149149
Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
150150
Vc_INTRINSIC_L void setQnan(const Mask &k) Vc_INTRINSIC_R;
151151

152-
///////////////////////////////////////////////////////////////////////////////////////////
153-
// swizzles
154-
Vc_INTRINSIC_L Vc_PURE_L const Vector &abcd() const Vc_INTRINSIC_R Vc_PURE_R;
155-
Vc_INTRINSIC_L Vc_PURE_L const Vector cdab() const Vc_INTRINSIC_R Vc_PURE_R;
156-
Vc_INTRINSIC_L Vc_PURE_L const Vector badc() const Vc_INTRINSIC_R Vc_PURE_R;
157-
Vc_INTRINSIC_L Vc_PURE_L const Vector aaaa() const Vc_INTRINSIC_R Vc_PURE_R;
158-
Vc_INTRINSIC_L Vc_PURE_L const Vector bbbb() const Vc_INTRINSIC_R Vc_PURE_R;
159-
Vc_INTRINSIC_L Vc_PURE_L const Vector cccc() const Vc_INTRINSIC_R Vc_PURE_R;
160-
Vc_INTRINSIC_L Vc_PURE_L const Vector dddd() const Vc_INTRINSIC_R Vc_PURE_R;
161-
Vc_INTRINSIC_L Vc_PURE_L const Vector bcad() const Vc_INTRINSIC_R Vc_PURE_R;
162-
Vc_INTRINSIC_L Vc_PURE_L const Vector bcda() const Vc_INTRINSIC_R Vc_PURE_R;
163-
Vc_INTRINSIC_L Vc_PURE_L const Vector dabc() const Vc_INTRINSIC_R Vc_PURE_R;
164-
Vc_INTRINSIC_L Vc_PURE_L const Vector acbd() const Vc_INTRINSIC_R Vc_PURE_R;
165-
Vc_INTRINSIC_L Vc_PURE_L const Vector dbca() const Vc_INTRINSIC_R Vc_PURE_R;
166-
Vc_INTRINSIC_L Vc_PURE_L const Vector dcba() const Vc_INTRINSIC_R Vc_PURE_R;
167-
168152
#include "../common/gatherinterface.h"
169153
#include "../common/scatterinterface.h"
170154

0 commit comments

Comments
 (0)