@@ -194,24 +194,111 @@ void RendererSIMD::OverlayMix() noexcept
194
194
}
195
195
196
196
if constexpr (MIX) // Mixing.
197
- ApplyICFMixSIMD<PLANE_ORDER>();
197
+ ApplyICFMixSIMDShift<PLANE_ORDER>();
198
+ // ApplyICFMixSIMDCast<PLANE_ORDER>();
198
199
else // Overlay.
199
200
ApplyICFOverlaySIMD<PLANE_ORDER>();
200
201
}
201
202
202
203
using PixelSIMDSigned = stdx::native_simd<int32_t >;
203
- // using PixelSIMDSignedMask = stdx::native_simd_mask<int32_t>;
204
+ using SIMDU8 = stdx::native_simd<uint8_t >;
205
+ using SIMDS16 = stdx::native_simd<int16_t >;
206
+ using FixedS16 = stdx::fixed_size_simd<int16_t , SIMDS16::size() * sizeof (SIMDS16::value_type)>;
204
207
205
208
static const PixelSIMDSigned SIXTEEN{16 };
206
- static const PixelSIMDSigned ZERO{0 };
207
- static const PixelSIMDSigned MAX{255 };
209
+ static const FixedS16 SIXTEENN{16 };
210
+ static const PixelSIMDSigned U8_MIN{0 };
211
+ static const FixedS16 U8_MINN{0 };
212
+ static const PixelSIMDSigned U8_MAX{255 };
213
+ static const FixedS16 U8_MAXX{255 };
208
214
static const PixelSIMDSigned ALPHA_MASK{-16777216 }; // 0xFF'00'00'00
215
+ static const PixelSIMD ALPHA_MASKK{0xFF'00'00'00 }; // 0xFF'00'00'00
209
216
210
- /* * \brief Applies ICF and mixes using SIMD.
211
- * \param icfs The front ICF in low 16 bits and the back ICF in high 16 bits
217
+ /* * \brief Applies ICF and mixes using SIMD (algorithm that casts the registers to access RGB components) .
218
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
212
219
*/
213
220
template <bool PLANE_ORDER>
214
- void RendererSIMD::ApplyICFMixSIMD () noexcept
221
+ void RendererSIMD::ApplyICFMixSIMDCast () noexcept
222
+ {
223
+ Pixel* screen = m_screenARGB.GetLinePointer (m_lineNumber);
224
+ const Pixel* planeFront;
225
+ const Pixel* planeBack;
226
+ const uint8_t * icfFront;
227
+ const uint8_t * icfBack;
228
+ if constexpr (PLANE_ORDER)
229
+ {
230
+ planeFront = m_planeLine[B].data ();
231
+ planeBack = m_planeLine[A].data ();
232
+ icfFront = m_icfLine[B].data ();
233
+ icfBack = m_icfLine[A].data ();
234
+ }
235
+ else
236
+ {
237
+ planeFront = m_planeLine[A].data ();
238
+ planeBack = m_planeLine[B].data ();
239
+ icfFront = m_icfLine[A].data ();
240
+ icfBack = m_icfLine[B].data ();
241
+ }
242
+
243
+ for (uint16_t i = 0 ; i < m_plane[A].m_width ;
244
+ i += SIMD_SIZE, planeFront += SIMD_SIZE, planeBack += SIMD_SIZE, icfFront += SIMD_SIZE, icfBack += SIMD_SIZE, screen += SIMD_SIZE) // TODO: width[B].
245
+ {
246
+ PixelSIMD icfF{icfFront, stdx::element_aligned};
247
+ PixelSIMD icfB{icfBack, stdx::element_aligned};
248
+
249
+ PixelSIMD planeF{planeFront, stdx::element_aligned};
250
+ PixelSIMD planeB{planeBack, stdx::element_aligned};
251
+
252
+ // transparent areas of an image simply give no contribution to the final display
253
+ // - that is they are equivalent to black areas..
254
+ const PixelSIMD::mask_type maskF = (planeF & ALPHA_MASKK) == 0 ;
255
+ const PixelSIMD::mask_type maskB = (planeB & ALPHA_MASKK) == 0 ;
256
+ stdx::where (maskF, planeF) = 0x00'10'10'10 ;
257
+ stdx::where (maskB, planeB) = 0x00'10'10'10 ;
258
+ stdx::where (maskF, icfF) = 63 ;
259
+ stdx::where (maskB, icfB) = 63 ;
260
+
261
+ // extend ICF to whole register.
262
+ icfF |= (icfF << 16 ) | (icfF << 8 );
263
+ icfB |= (icfB << 16 ) | (icfB << 8 );
264
+
265
+ SIMDU8 rgbF8 = std::bit_cast<SIMDU8>(planeF);
266
+ SIMDU8 rgbB8 = std::bit_cast<SIMDU8>(planeB);
267
+ SIMDU8 icfF8 = std::bit_cast<SIMDU8>(icfF);
268
+ SIMDU8 icfB8 = std::bit_cast<SIMDU8>(icfB);
269
+
270
+ FixedS16 rgbF16 = stdx::static_simd_cast<int16_t >(rgbF8);
271
+ FixedS16 rgbB16 = stdx::static_simd_cast<int16_t >(rgbB8);
272
+ FixedS16 icfF16 = stdx::static_simd_cast<int16_t >(icfF8);
273
+ FixedS16 icfB16 = stdx::static_simd_cast<int16_t >(icfB8);
274
+
275
+ rgbF16 -= SIXTEENN;
276
+ rgbB16 -= SIXTEENN;
277
+
278
+ rgbF16 *= icfF16;
279
+ rgbB16 *= icfB16;
280
+
281
+ rgbF16 /= 63 ;
282
+ rgbB16 /= 63 ;
283
+
284
+ rgbF16 += SIXTEENN;
285
+ // rgbB16 += SIXTEENN; Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
286
+
287
+ rgbF16 += rgbB16;
288
+
289
+ stdx::clamp (rgbF16, U8_MINN, U8_MAXX);
290
+
291
+ const PixelSIMD result = std::bit_cast<PixelSIMD>(stdx::static_simd_cast<SIMDU8>(rgbF16));
292
+
293
+ result.copy_to (screen, stdx::element_aligned);
294
+ }
295
+ }
296
+
297
+ /* * \brief Applies ICF and mixes using SIMD (algorithm that shifts and masks RGB components).
298
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
299
+ */
300
+ template <bool PLANE_ORDER>
301
+ void RendererSIMD::ApplyICFMixSIMDShift () noexcept
215
302
{
216
303
Pixel* screen = m_screenARGB.GetLinePointer (m_lineNumber);
217
304
const Pixel* planeFront;
@@ -242,6 +329,8 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
242
329
PixelSIMDSigned planeF{planeFront, stdx::element_aligned};
243
330
PixelSIMDSigned planeB{planeBack, stdx::element_aligned};
244
331
332
+ // transparent areas of an image simply give no contribution to the final display
333
+ // - that is they are equivalent to black areas..
245
334
const PixelSIMDSigned::mask_type maskF = (planeF & ALPHA_MASK) == 0 ;
246
335
const PixelSIMDSigned::mask_type maskB = (planeB & ALPHA_MASK) == 0 ;
247
336
stdx::where (maskF, planeF) = 0x00'10'10'10 ;
@@ -292,23 +381,28 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
292
381
rfp += SIXTEEN;
293
382
gfp += SIXTEEN;
294
383
bfp += SIXTEEN;
295
-
296
384
// Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
385
+
297
386
rfp += rbp;
298
387
gfp += gbp;
299
388
bfp += bbp;
300
389
301
- stdx::clamp (rfp, ZERO, MAX );
302
- stdx::clamp (gfp, ZERO, MAX );
303
- stdx::clamp (bfp, ZERO, MAX );
390
+ stdx::clamp (rfp, U8_MIN, U8_MAX );
391
+ stdx::clamp (gfp, U8_MIN, U8_MAX );
392
+ stdx::clamp (bfp, U8_MIN, U8_MAX );
304
393
305
394
const PixelSIMDSigned result = (rfp << 16 ) | (gfp << 8 ) | bfp;
306
395
307
396
result.copy_to (screen, stdx::element_aligned);
308
397
}
309
398
}
399
+ // template void RendererSIMD::ApplyICFMixSIMDShift<false>() noexcept;
400
+ // template void RendererSIMD::ApplyICFMixSIMDShift<true>() noexcept;
310
401
311
402
/* * \brief Applies ICF and overlays using SIMD.
403
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
404
+ *
405
+ * TODO: implement the cast method here too and benchmark it.
312
406
*/
313
407
template <bool PLANE_ORDER>
314
408
void RendererSIMD::ApplyICFOverlaySIMD () noexcept
0 commit comments