Skip to content

Commit

Permalink
Merge pull request #2421 from pygame-community/simd-transform-setup
Browse files Browse the repository at this point in the history
Add SIMD functionality to the transform submodule (Attempt 2)
  • Loading branch information
itzpr3d4t0r authored Sep 8, 2023
2 parents 0168adc + e51a250 commit e822283
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 66 deletions.
2 changes: 1 addition & 1 deletion buildconfig/Setup.Android.SDL2.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ time src_c/time.c $(SDL) $(DEBUG)
joystick src_c/joystick.c $(SDL) $(DEBUG)
draw src_c/draw.c $(SDL) $(DEBUG)
image src_c/image.c $(SDL) $(DEBUG)
transform src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
transform src_c/simd_transform_sse2.c src_c/simd_transform_avx2.c src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
mask src_c/mask.c src_c/bitmask.c $(SDL) $(DEBUG)
bufferproxy src_c/bufferproxy.c $(SDL) $(DEBUG)
pixelarray src_c/pixelarray.c $(SDL) $(DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion buildconfig/Setup.Emscripten.SDL2.in
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ _sdl2.controller_old src_c/void.c
#_sdl2.touch src_c/_sdl2/touch.c $(SDL) $(DEBUG) -Isrc_c
_sdl2.touch src_c/void.c

#transform src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
#transform src_c/simd_transform_sse2.c src_c/simd_transform_avx2.c src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
transform src_c/void.c


2 changes: 1 addition & 1 deletion buildconfig/Setup.SDL2.in
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ time src_c/time.c $(SDL) $(DEBUG)
joystick src_c/joystick.c $(SDL) $(DEBUG)
draw src_c/draw.c $(SDL) $(DEBUG)
image src_c/image.c $(SDL) $(DEBUG)
transform src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG)
transform src_c/simd_transform_sse2.c src_c/simd_transform_avx2.c src_c/transform.c src_c/rotozoom.c src_c/scale2x.c src_c/scale_mmx.c $(SDL) $(DEBUG)
mask src_c/mask.c src_c/bitmask.c $(SDL) $(DEBUG)
bufferproxy src_c/bufferproxy.c $(SDL) $(DEBUG)
pixelarray src_c/pixelarray.c $(SDL) $(DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@

import distutils.ccompiler

avx2_filenames = ['simd_blitters_avx2']
avx2_filenames = ['simd_blitters_avx2', 'simd_transform_avx2']

compiler_options = {
'unix': ('-mavx2',),
Expand Down
54 changes: 1 addition & 53 deletions src_c/alphablit.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,59 +24,7 @@

#define NO_PYGAME_C_API
#include "_surface.h"

#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
// passed
#define PG_ENABLE_ARM_NEON 1
#endif

/* See if we are compiled 64 bit on GCC or MSVC */
#if _WIN32 || _WIN64
#if _WIN64
#define ENV64BIT
#endif
#endif

// Check GCC
#if __GNUC__
#if __x86_64__ || __ppc64__ || __aarch64__
#define ENV64BIT
#endif
#endif

#if PG_ENABLE_ARM_NEON
// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon
#include "include/sse2neon.h"
#endif /* PG_ENABLE_ARM_NEON */

/* This defines PG_ENABLE_SSE_NEON as True if either SSE or NEON is available
* at compile time. Since we do compile time translation of SSE2->NEON, they
* have the same code paths, so this reduces code duplication of those paths.
*/
#if defined(__SSE2__)
#define PG_ENABLE_SSE_NEON 1
#elif PG_ENABLE_ARM_NEON
#define PG_ENABLE_SSE_NEON 1
#else
#define PG_ENABLE_SSE_NEON 0
#endif

/* This returns True if either SSE2 or NEON is present at runtime.
* Relevant because they use the same codepaths. Only the relevant runtime
* SDL cpu feature check is compiled in.*/
int
pg_HasSSE_NEON()
{
#if defined(__SSE2__)
return SDL_HasSSE2();
#elif PG_ENABLE_ARM_NEON
return SDL_HasNEON();
#else
return 0;
#endif
}

#include "simd_shared.h"
#include "simd_blitters.h"

static void
Expand Down
9 changes: 0 additions & 9 deletions src_c/simd_blitters.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,6 @@
#define PG_ENABLE_ARM_NEON 1
#endif

int
pg_sse2_at_runtime_but_uncompiled();
int
pg_neon_at_runtime_but_uncompiled();
int
pg_avx2_at_runtime_but_uncompiled();

#if (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON))
void
alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info);
Expand Down Expand Up @@ -60,8 +53,6 @@ premul_surf_color_by_alpha_non_simd(SDL_Surface *src, SDL_Surface *dst);
void
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst);

int
pg_has_avx2();
void
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info);
void
Expand Down
68 changes: 68 additions & 0 deletions src_c/simd_shared.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#define NO_PYGAME_C_API
#ifndef SIMD_SHARED_H
#define SIMD_SHARED_H

#include "_surface.h"

int
pg_sse2_at_runtime_but_uncompiled();
int
pg_neon_at_runtime_but_uncompiled();
int
pg_avx2_at_runtime_but_uncompiled();
int
pg_has_avx2();

#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
// passed
#define PG_ENABLE_ARM_NEON 1
#endif

/* See if we are compiled 64 bit on GCC or MSVC */
#if _WIN32 || _WIN64
#if _WIN64
#define ENV64BIT
#endif
#endif

// Check GCC
#if __GNUC__
#if __x86_64__ || __ppc64__ || __aarch64__
#define ENV64BIT
#endif
#endif

#if PG_ENABLE_ARM_NEON
// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon
#include "include/sse2neon.h"
#endif /* PG_ENABLE_ARM_NEON */

/* This defines PG_ENABLE_SSE_NEON as True if either SSE or NEON is available
* at compile time. Since we do compile time translation of SSE2->NEON, they
* have the same code paths, so this reduces code duplication of those paths.
*/
#if defined(__SSE2__)
#define PG_ENABLE_SSE_NEON 1
#elif PG_ENABLE_ARM_NEON
#define PG_ENABLE_SSE_NEON 1
#else
#define PG_ENABLE_SSE_NEON 0
#endif

/* This returns True if either SSE2 or NEON is present at runtime.
* Relevant because they use the same codepaths. Only the relevant runtime
* SDL cpu feature check is compiled in.*/
int
pg_HasSSE_NEON()
{
#if defined(__SSE2__)
return SDL_HasSSE2();
#elif PG_ENABLE_ARM_NEON
return SDL_HasNEON();
#else
return 0;
#endif
}

#endif // SIMD_SHARED_H
14 changes: 14 additions & 0 deletions src_c/simd_transform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#define NO_PYGAME_C_API
#include "_surface.h"

#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
// passed
#define PG_ENABLE_ARM_NEON 1
#endif

// SSE2 functions
#if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */

// AVX2 functions
44 changes: 44 additions & 0 deletions src_c/simd_transform_avx2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "simd_transform.h"

#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
#include <immintrin.h>
#endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */

#define BAD_AVX2_FUNCTION_CALL \
printf( \
"Fatal Error: Attempted calling an AVX2 function when both compile " \
"time and runtime support is missing. If you are seeing this " \
"message, you have stumbled across a pygame bug, please report it " \
"to the devs!"); \
PG_EXIT(1)

/* helper function that does a runtime check for AVX2. It has the added
* functionality of also returning 0 if compile time support is missing */
int
pg_has_avx2()
{
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
return SDL_HasAVX2();
#else
return 0;
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */
}

/* This returns 1 when avx2 is available at runtime but support for it isn't
* compiled in, 0 in all other cases */
int
pg_avx2_at_runtime_but_uncompiled()
{
if (SDL_HasAVX2()) {
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
return 0;
#else
return 1;
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */
}
return 0;
}
36 changes: 36 additions & 0 deletions src_c/simd_transform_sse2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include "simd_transform.h"

#if PG_ENABLE_ARM_NEON
// sse2neon.h is from here: https://github.com/DLTcollab/sse2neon
#include "include/sse2neon.h"
#endif /* PG_ENABLE_ARM_NEON */

/* This returns 1 when sse2 is available at runtime but support for it isn't
* compiled in, 0 in all other cases */
int
pg_sse2_at_runtime_but_uncompiled()
{
if (SDL_HasSSE2()) {
#ifdef __SSE2__
return 0;
#else
return 1;
#endif /* __SSE2__ */
}
return 0;
}

/* This returns 1 when neon is available at runtime but support for it isn't
* compiled in, 0 in all other cases */
int
pg_neon_at_runtime_but_uncompiled()
{
if (SDL_HasNEON()) {
#if PG_ENABLE_ARM_NEON
return 0;
#else
return 1;
#endif /* PG_ENABLE_ARM_NEON */
}
return 0;
}
2 changes: 2 additions & 0 deletions src_c/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <math.h>
#include <string.h>

#include "simd_shared.h"
#include "simd_transform.h"
#include "scale.h"

typedef void (*SMOOTHSCALE_FILTER_P)(Uint8 *, Uint8 *, int, int, int, int,
Expand Down

0 comments on commit e822283

Please sign in to comment.