@@ -194,24 +194,113 @@ void RendererSIMD::OverlayMix() noexcept
194
194
}
195
195
196
196
if constexpr (MIX) // Mixing.
197
- ApplyICFMixSIMD<PLANE_ORDER>();
197
+ ApplyICFMixSIMDShift<PLANE_ORDER>();
198
+ // ApplyICFMixSIMDCast<PLANE_ORDER>();
198
199
else // Overlay.
199
200
ApplyICFOverlaySIMD<PLANE_ORDER>();
200
201
}
201
202
202
203
using PixelSIMDSigned = stdx::native_simd<int32_t >;
203
- // using PixelSIMDSignedMask = stdx::native_simd_mask<int32_t>;
204
+ using SIMDU8 = stdx::native_simd<uint8_t >;
205
+ using SIMDS16 = stdx::native_simd<int16_t >;
206
+ using FixedS16 = stdx::fixed_size_simd<int16_t , SIMDS16::size() * sizeof (SIMDS16::value_type)>;
204
207
205
208
static const PixelSIMDSigned SIXTEEN{16 };
206
- static const PixelSIMDSigned ZERO{0 };
207
- static const PixelSIMDSigned MAX{255 };
209
+ static const FixedS16 SIXTEENN{16 };
210
+ static const PixelSIMDSigned U8_MIN{0 };
211
+ static const FixedS16 U8_MINN{0 };
212
+ static const PixelSIMDSigned U8_MAX{255 };
213
+ static const FixedS16 U8_MAXX{255 };
208
214
static const PixelSIMDSigned ALPHA_MASK{-16777216 }; // 0xFF'00'00'00
215
+ static const PixelSIMD ALPHA_MASKK{0xFF'00'00'00 }; // 0xFF'00'00'00
209
216
210
- /* * \brief Applies ICF and mixes using SIMD.
211
- * \param icfs The front ICF in low 16 bits and the back ICF in high 16 bits
217
+ /* * \brief Applies ICF and mixes using SIMD (algorithm that casts the registers to access RGB components) .
218
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
212
219
*/
213
220
template <bool PLANE_ORDER>
214
- void RendererSIMD::ApplyICFMixSIMD () noexcept
221
+ void RendererSIMD::ApplyICFMixSIMDCast () noexcept
222
+ {
223
+ Pixel* screen = m_screenARGB.GetLinePointer (m_lineNumber);
224
+ const Pixel* planeFront;
225
+ const Pixel* planeBack;
226
+ const uint8_t * icfFront;
227
+ const uint8_t * icfBack;
228
+ if constexpr (PLANE_ORDER)
229
+ {
230
+ planeFront = m_planeLine[B].data ();
231
+ planeBack = m_planeLine[A].data ();
232
+ icfFront = m_icfLine[B].data ();
233
+ icfBack = m_icfLine[A].data ();
234
+ }
235
+ else
236
+ {
237
+ planeFront = m_planeLine[A].data ();
238
+ planeBack = m_planeLine[B].data ();
239
+ icfFront = m_icfLine[A].data ();
240
+ icfBack = m_icfLine[B].data ();
241
+ }
242
+
243
+ for (uint16_t i = 0 ; i < m_plane[A].m_width ;
244
+ i += SIMD_SIZE, planeFront += SIMD_SIZE, planeBack += SIMD_SIZE, icfFront += SIMD_SIZE, icfBack += SIMD_SIZE, screen += SIMD_SIZE) // TODO: width[B].
245
+ {
246
+ PixelSIMD icfF{icfFront, stdx::element_aligned};
247
+ PixelSIMD icfB{icfBack, stdx::element_aligned};
248
+
249
+ PixelSIMD planeF{planeFront, stdx::element_aligned};
250
+ PixelSIMD planeB{planeBack, stdx::element_aligned};
251
+
252
+ // transparent areas of an image simply give no contribution to the final display
253
+ // - that is they are equivalent to black areas..
254
+ const PixelSIMD::mask_type maskF = (planeF & ALPHA_MASKK) == 0 ;
255
+ const PixelSIMD::mask_type maskB = (planeB & ALPHA_MASKK) == 0 ;
256
+ stdx::where (maskF, planeF) = 0x00'10'10'10 ;
257
+ stdx::where (maskB, planeB) = 0x00'10'10'10 ;
258
+ stdx::where (maskF, icfF) = 63 ;
259
+ stdx::where (maskB, icfB) = 63 ;
260
+
261
+ // extend ICF to whole register.
262
+ icfF *= 0x00'01'01'01 ;
263
+ icfB *= 0x00'01'01'01 ;
264
+ // icfF |= (icfF << 16) | (icfF << 8);
265
+ // icfB |= (icfB << 16) | (icfB << 8);
266
+
267
+ SIMDU8 rgbF8 = std::bit_cast<SIMDU8>(planeF);
268
+ SIMDU8 rgbB8 = std::bit_cast<SIMDU8>(planeB);
269
+ SIMDU8 icfF8 = std::bit_cast<SIMDU8>(icfF);
270
+ SIMDU8 icfB8 = std::bit_cast<SIMDU8>(icfB);
271
+
272
+ FixedS16 rgbF16 = stdx::static_simd_cast<int16_t >(rgbF8);
273
+ FixedS16 rgbB16 = stdx::static_simd_cast<int16_t >(rgbB8);
274
+ FixedS16 icfF16 = stdx::static_simd_cast<int16_t >(icfF8);
275
+ FixedS16 icfB16 = stdx::static_simd_cast<int16_t >(icfB8);
276
+
277
+ rgbF16 -= SIXTEENN;
278
+ rgbB16 -= SIXTEENN;
279
+
280
+ rgbF16 *= icfF16;
281
+ rgbB16 *= icfB16;
282
+
283
+ rgbF16 /= 63 ;
284
+ rgbB16 /= 63 ;
285
+
286
+ rgbF16 += SIXTEENN;
287
+ // rgbB16 += SIXTEENN; Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
288
+
289
+ rgbF16 += rgbB16;
290
+
291
+ stdx::clamp (rgbF16, U8_MINN, U8_MAXX);
292
+
293
+ const PixelSIMD result = std::bit_cast<PixelSIMD>(stdx::static_simd_cast<SIMDU8>(rgbF16));
294
+
295
+ result.copy_to (screen, stdx::element_aligned);
296
+ }
297
+ }
298
+
299
+ /* * \brief Applies ICF and mixes using SIMD (algorithm that shifts and masks RGB components).
300
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
301
+ */
302
+ template <bool PLANE_ORDER>
303
+ void RendererSIMD::ApplyICFMixSIMDShift () noexcept
215
304
{
216
305
Pixel* screen = m_screenARGB.GetLinePointer (m_lineNumber);
217
306
const Pixel* planeFront;
@@ -242,6 +331,8 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
242
331
PixelSIMDSigned planeF{planeFront, stdx::element_aligned};
243
332
PixelSIMDSigned planeB{planeBack, stdx::element_aligned};
244
333
334
+ // transparent areas of an image simply give no contribution to the final display
335
+ // - that is they are equivalent to black areas..
245
336
const PixelSIMDSigned::mask_type maskF = (planeF & ALPHA_MASK) == 0 ;
246
337
const PixelSIMDSigned::mask_type maskB = (planeB & ALPHA_MASK) == 0 ;
247
338
stdx::where (maskF, planeF) = 0x00'10'10'10 ;
@@ -292,23 +383,28 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
292
383
rfp += SIXTEEN;
293
384
gfp += SIXTEEN;
294
385
bfp += SIXTEEN;
295
-
296
386
// Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
387
+
297
388
rfp += rbp;
298
389
gfp += gbp;
299
390
bfp += bbp;
300
391
301
- stdx::clamp (rfp, ZERO, MAX );
302
- stdx::clamp (gfp, ZERO, MAX );
303
- stdx::clamp (bfp, ZERO, MAX );
392
+ stdx::clamp (rfp, U8_MIN, U8_MAX );
393
+ stdx::clamp (gfp, U8_MIN, U8_MAX );
394
+ stdx::clamp (bfp, U8_MIN, U8_MAX );
304
395
305
396
const PixelSIMDSigned result = (rfp << 16 ) | (gfp << 8 ) | bfp;
306
397
307
398
result.copy_to (screen, stdx::element_aligned);
308
399
}
309
400
}
401
+ // template void RendererSIMD::ApplyICFMixSIMDShift<false>() noexcept;
402
+ // template void RendererSIMD::ApplyICFMixSIMDShift<true>() noexcept;
310
403
311
404
/* * \brief Applies ICF and overlays using SIMD.
405
+ * \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
406
+ *
407
+ * TODO: implement the cast method here too and benchmark it.
312
408
*/
313
409
template <bool PLANE_ORDER>
314
410
void RendererSIMD::ApplyICFOverlaySIMD () noexcept
0 commit comments