Skip to content

Commit 8734247

Browse files
committed
New SIMD algorithm using casts for mixing (not faster than shifts and masks)
1 parent 3d2684c commit 8734247

File tree

2 files changed

+111
-14
lines changed

2 files changed

+111
-14
lines changed

src/CDI/Video/RendererSIMD.cpp

+107-11
Original file line numberDiff line numberDiff line change
@@ -194,24 +194,113 @@ void RendererSIMD::OverlayMix() noexcept
194194
}
195195

196196
if constexpr(MIX) // Mixing.
197-
ApplyICFMixSIMD<PLANE_ORDER>();
197+
ApplyICFMixSIMDShift<PLANE_ORDER>();
198+
// ApplyICFMixSIMDCast<PLANE_ORDER>();
198199
else // Overlay.
199200
ApplyICFOverlaySIMD<PLANE_ORDER>();
200201
}
201202

202203
using PixelSIMDSigned = stdx::native_simd<int32_t>;
203-
// using PixelSIMDSignedMask = stdx::native_simd_mask<int32_t>;
204+
using SIMDU8 = stdx::native_simd<uint8_t>;
205+
using SIMDS16 = stdx::native_simd<int16_t>;
206+
using FixedS16 = stdx::fixed_size_simd<int16_t, SIMDS16::size() * sizeof(SIMDS16::value_type)>;
204207

205208
static const PixelSIMDSigned SIXTEEN{16};
206-
static const PixelSIMDSigned ZERO{0};
207-
static const PixelSIMDSigned MAX{255};
209+
static const FixedS16 SIXTEENN{16};
210+
static const PixelSIMDSigned U8_MIN{0};
211+
static const FixedS16 U8_MINN{0};
212+
static const PixelSIMDSigned U8_MAX{255};
213+
static const FixedS16 U8_MAXX{255};
208214
static const PixelSIMDSigned ALPHA_MASK{-16777216}; // 0xFF'00'00'00
215+
static const PixelSIMD ALPHA_MASKK{0xFF'00'00'00}; // 0xFF'00'00'00
209216

210-
/** \brief Applies ICF and mixes using SIMD.
211-
* \param icfs The front ICF in low 16 bits and the back ICF in high 16 bits
217+
/** \brief Applies ICF and mixes using SIMD (algorithm that casts the registers to access RGB components).
218+
* \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
212219
*/
213220
template<bool PLANE_ORDER>
214-
void RendererSIMD::ApplyICFMixSIMD() noexcept
221+
void RendererSIMD::ApplyICFMixSIMDCast() noexcept
222+
{
223+
Pixel* screen = m_screenARGB.GetLinePointer(m_lineNumber);
224+
const Pixel* planeFront;
225+
const Pixel* planeBack;
226+
const uint8_t* icfFront;
227+
const uint8_t* icfBack;
228+
if constexpr(PLANE_ORDER)
229+
{
230+
planeFront = m_planeLine[B].data();
231+
planeBack = m_planeLine[A].data();
232+
icfFront = m_icfLine[B].data();
233+
icfBack = m_icfLine[A].data();
234+
}
235+
else
236+
{
237+
planeFront = m_planeLine[A].data();
238+
planeBack = m_planeLine[B].data();
239+
icfFront = m_icfLine[A].data();
240+
icfBack = m_icfLine[B].data();
241+
}
242+
243+
for(uint16_t i = 0; i < m_plane[A].m_width;
244+
i += SIMD_SIZE, planeFront += SIMD_SIZE, planeBack += SIMD_SIZE, icfFront += SIMD_SIZE, icfBack += SIMD_SIZE, screen += SIMD_SIZE) // TODO: width[B].
245+
{
246+
PixelSIMD icfF{icfFront, stdx::element_aligned};
247+
PixelSIMD icfB{icfBack, stdx::element_aligned};
248+
249+
PixelSIMD planeF{planeFront, stdx::element_aligned};
250+
PixelSIMD planeB{planeBack, stdx::element_aligned};
251+
252+
// transparent areas of an image simply give no contribution to the final display
253+
// - that is they are equivalent to black areas..
254+
const PixelSIMD::mask_type maskF = (planeF & ALPHA_MASKK) == 0;
255+
const PixelSIMD::mask_type maskB = (planeB & ALPHA_MASKK) == 0;
256+
stdx::where(maskF, planeF) = 0x00'10'10'10;
257+
stdx::where(maskB, planeB) = 0x00'10'10'10;
258+
stdx::where(maskF, icfF) = 63;
259+
stdx::where(maskB, icfB) = 63;
260+
261+
// extend ICF to whole register.
262+
icfF *= 0x00'01'01'01;
263+
icfB *= 0x00'01'01'01;
264+
// icfF |= (icfF << 16) | (icfF << 8);
265+
// icfB |= (icfB << 16) | (icfB << 8);
266+
267+
SIMDU8 rgbF8 = std::bit_cast<SIMDU8>(planeF);
268+
SIMDU8 rgbB8 = std::bit_cast<SIMDU8>(planeB);
269+
SIMDU8 icfF8 = std::bit_cast<SIMDU8>(icfF);
270+
SIMDU8 icfB8 = std::bit_cast<SIMDU8>(icfB);
271+
272+
FixedS16 rgbF16 = stdx::static_simd_cast<int16_t>(rgbF8);
273+
FixedS16 rgbB16 = stdx::static_simd_cast<int16_t>(rgbB8);
274+
FixedS16 icfF16 = stdx::static_simd_cast<int16_t>(icfF8);
275+
FixedS16 icfB16 = stdx::static_simd_cast<int16_t>(icfB8);
276+
277+
rgbF16 -= SIXTEENN;
278+
rgbB16 -= SIXTEENN;
279+
280+
rgbF16 *= icfF16;
281+
rgbB16 *= icfB16;
282+
283+
rgbF16 /= 63;
284+
rgbB16 /= 63;
285+
286+
rgbF16 += SIXTEENN;
287+
// rgbB16 += SIXTEENN; Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
288+
289+
rgbF16 += rgbB16;
290+
291+
stdx::clamp(rgbF16, U8_MINN, U8_MAXX);
292+
293+
const PixelSIMD result = std::bit_cast<PixelSIMD>(stdx::static_simd_cast<SIMDU8>(rgbF16));
294+
295+
result.copy_to(screen, stdx::element_aligned);
296+
}
297+
}
298+
299+
/** \brief Applies ICF and mixes using SIMD (algorithm that shifts and masks RGB components).
300+
* \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
301+
*/
302+
template<bool PLANE_ORDER>
303+
void RendererSIMD::ApplyICFMixSIMDShift() noexcept
215304
{
216305
Pixel* screen = m_screenARGB.GetLinePointer(m_lineNumber);
217306
const Pixel* planeFront;
@@ -242,6 +331,8 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
242331
PixelSIMDSigned planeF{planeFront, stdx::element_aligned};
243332
PixelSIMDSigned planeB{planeBack, stdx::element_aligned};
244333

334+
// transparent areas of an image simply give no contribution to the final display
335+
// - that is they are equivalent to black areas..
245336
const PixelSIMDSigned::mask_type maskF = (planeF & ALPHA_MASK) == 0;
246337
const PixelSIMDSigned::mask_type maskB = (planeB & ALPHA_MASK) == 0;
247338
stdx::where(maskF, planeF) = 0x00'10'10'10;
@@ -292,23 +383,28 @@ void RendererSIMD::ApplyICFMixSIMD() noexcept
292383
rfp += SIXTEEN;
293384
gfp += SIXTEEN;
294385
bfp += SIXTEEN;
295-
296386
// Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
387+
297388
rfp += rbp;
298389
gfp += gbp;
299390
bfp += bbp;
300391

301-
stdx::clamp(rfp, ZERO, MAX);
302-
stdx::clamp(gfp, ZERO, MAX);
303-
stdx::clamp(bfp, ZERO, MAX);
392+
stdx::clamp(rfp, U8_MIN, U8_MAX);
393+
stdx::clamp(gfp, U8_MIN, U8_MAX);
394+
stdx::clamp(bfp, U8_MIN, U8_MAX);
304395

305396
const PixelSIMDSigned result = (rfp << 16) | (gfp << 8) | bfp;
306397

307398
result.copy_to(screen, stdx::element_aligned);
308399
}
309400
}
401+
// template void RendererSIMD::ApplyICFMixSIMDShift<false>() noexcept;
402+
// template void RendererSIMD::ApplyICFMixSIMDShift<true>() noexcept;
310403

311404
/** \brief Applies ICF and overlays using SIMD.
405+
* \tparam PLANE_ORDER true when plane B in front of plane A, false for A in front of B.
406+
*
407+
* TODO: implement the cast method here too and benchmark it.
312408
*/
313409
template<bool PLANE_ORDER>
314410
void RendererSIMD::ApplyICFOverlaySIMD() noexcept

src/CDI/Video/RendererSIMD.hpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ class RendererSIMD final : public Renderer
2828
PlaneSIMD m_cursorPlaneARGB{PlaneSIMD::CURSOR_WIDTH, PlaneSIMD::CURSOR_HEIGHT, PlaneSIMD::CURSOR_ARGB_SIZE};
2929

3030
std::array<std::array<uint8_t, SIMD_LINE_WIDTH>, 2> m_icfLine{};
31-
std::array<uint8_t, 2> m_currentICF{};
31+
// std::array<uint8_t, 2> m_currentICF{};
3232
// std::array<std::array<uint8_t, SIMD_LINE_WIDTH>, 2> m_matteFlagLine{};
33-
uint32_t m_currentMatteCommand{0};
33+
// uint32_t m_currentMatteCommand{0};
3434

3535
RendererSIMD() {}
3636
virtual ~RendererSIMD() noexcept {}
@@ -44,7 +44,8 @@ class RendererSIMD final : public Renderer
4444
void DrawCursor() noexcept;
4545

4646
template<bool MIX, bool PLANE_ORDER> void OverlayMix() noexcept;
47-
template<bool PLANE_ORDER> void ApplyICFMixSIMD() noexcept;
47+
template<bool PLANE_ORDER> void ApplyICFMixSIMDCast() noexcept;
48+
template<bool PLANE_ORDER> void ApplyICFMixSIMDShift() noexcept;
4849
template<bool PLANE_ORDER> void ApplyICFOverlaySIMD() noexcept;
4950

5051
void ResetMatteSIMD() noexcept;

0 commit comments

Comments
 (0)