Skip to content

Commit

Permalink
NEON armv7a & aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
mx989 authored and Auburn committed Nov 2, 2022
1 parent c6b370f commit 31a065f
Show file tree
Hide file tree
Showing 6 changed files with 632 additions and 412 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ Uses FastSIMD to compile classes with multiple SIMD types and selects the fastes
- SSE4.1
- AVX2
- AVX512
- NEON

Supports:
- 32/64 bit
- Windows
- Linux
- Android
- MacOS
- MSVC
- Clang
Expand Down
5 changes: 3 additions & 2 deletions include/FastNoise/FastNoise_Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace FastNoise
FastSIMD::Level_SSE2 |
FastSIMD::Level_SSE41 |
FastSIMD::Level_AVX2 |
FastSIMD::Level_AVX512 ;
FastSIMD::Level_AVX512 |
FastSIMD::Level_NEON ;

class Generator;
struct Metadata;
Expand All @@ -41,4 +42,4 @@ namespace FastNoise

#if !FASTNOISE_USE_SHARED_PTR
#include "SmartNode.h"
#endif
#endif
81 changes: 80 additions & 1 deletion include/FastNoise/Generators/Utils.inl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,53 @@ namespace FastNoise
// Bit-8 = Flip sign of a + b
return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_NEON>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );

// Bit-4 = Choose X Y ordering
mask32v xy;

// if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
// {
// xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
// }
// else
// {
xy = index << 29;

// if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
// {
xy >>= 31;
// }
// }

float32v a = FS_Select_f32( xy, fY, fX );
float32v b = FS_Select_f32( xy, fX, fY );

// Bit-1 = b flip sign
b ^= FS_Casti32_f32( index << 31 );

// Bit-2 = Mul a by 2 or Root3
mask32v aMul2;

// if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
// {
// aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
// }
// else
// {
aMul2 = (index << 30) >> 31;
// }

a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
// b zero value if a mul 2
b = FS_NMask_f32( b, aMul2 );

// Bit-8 = Flip sign of a + b
return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}

template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
Expand Down Expand Up @@ -128,6 +175,38 @@ namespace FastNoise

return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_NEON> * = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
// ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
// ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )

int32v bit1 = (hash << 31);
int32v bit2 = (hash >> 1) << 31;
mask32v bit4;

// if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
// {
// bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
// }
// else
// {
bit4 = hash << 29;
//
// if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
// {
bit4 >>= 31;
// }
// }

fX ^= FS_Casti32_f32( bit1 );
fY ^= FS_Casti32_f32( bit2 );

float32v a = FS_Select_f32( bit4, fY, fX );
float32v b = FS_Select_f32( bit4, fX, fY );

return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
}

template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
Expand Down Expand Up @@ -311,4 +390,4 @@ namespace FastNoise
}

using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
namespace FnPrimes = FastNoise::Primes;
namespace FnPrimes = FastNoise::Primes;
5 changes: 5 additions & 0 deletions src/FastSIMD/FastSIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
#include <algorithm>
#include <cstdint>

#if FASTSIMD_x86

#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif

#endif


#include "FastSIMD/SIMDTypeList.h"

static_assert(FastSIMD::SIMDTypeList::MinimumCompiled & FastSIMD::COMPILED_SIMD_LEVELS, "FASTSIMD_FALLBACK_SIMD_LEVEL is not a compiled SIMD level, check FastSIMD_Config.h");
Expand Down
Loading

0 comments on commit 31a065f

Please sign in to comment.