|
| 1 | +#ifndef __VMATH_DECL_VEC16_F32_H |
| 2 | +#define __VMATH_DECL_VEC16_F32_H |
| 3 | + |
| 4 | +#include "vmath/internal/intrinsics.h" |
| 5 | +#include "vmath/internal/stdfloat.h" |
| 6 | + |
| 7 | +typedef struct VMATH_ALIGNED(64) |
| 8 | +{ |
| 9 | + vm_float32_t buffer[16]; |
| 10 | +} vm_v16fs_t; |
| 11 | + |
| 12 | +#if defined(VMATH_AVX512_GENERIC_ENABLE) |
| 13 | + |
| 14 | +typedef __m512 vm_v16f_t; |
| 15 | + |
| 16 | +#elif defined(VMATH_AVX256_GENERIC_ENABLE) |
| 17 | + |
| 18 | +// emulate 512 bits with 2x256 |
| 19 | +typedef struct VMATH_ALIGNED(64) |
| 20 | +{ |
| 21 | + __m256 buffer[2]; |
| 22 | +} vm_v16f_t; |
| 23 | + |
| 24 | +#elif defined(VMATH_SSE41_ENABLE) |
| 25 | + |
| 26 | +// emulate 512 bits with 4x128 |
| 27 | +typedef struct VMATH_ALIGNED(64) |
| 28 | +{ |
| 29 | + __m128 buffer[4]; |
| 30 | +} vm_v16f_t; |
| 31 | + |
| 32 | +#elif defined(VMATH_ARM_ENABLE) || defined(VMATH_ARM64_ENABLE) |
| 33 | +#error ARM SIMD not implemented |
| 34 | +#elif defined(VMATH_RISCV_V1_ENABLE) |
| 35 | +#error RISCV vector extensions not implemented |
| 36 | +#else |
| 37 | + |
| 38 | +typedef struct |
| 39 | +{ |
| 40 | + vm_float32_t buffer[16]; |
| 41 | +} vm_v16f_t; |
| 42 | + |
| 43 | +#endif |
| 44 | + |
| 45 | +/// Load 16 contiguous floats from memory. Memory must be 64 byte aligned. |
| 46 | +VMATH_INLINE_DECL vm_v16f_t vm_load_v16f(const vm_v16fs_t* vec); |
| 47 | +/// Load 16 contiguous floats from memory as a buffer of floats. Memory must be |
| 48 | +/// 64 byte aligned. |
| 49 | +VMATH_INLINE_DECL vm_v16f_t vm_loadb_v16f(const vm_float32_t vec[16]); |
| 50 | +/// Store 8 contiguous vec2s to memory. Memory must be 64 byte aligned. |
| 51 | +VMATH_INLINE_DECL void vm_store_v16f(vm_v16fs_t* output, vm_v16f_t vec); |
| 52 | +/// Store 8 contiguous vec2s to memory as a buffer of floats. Memory must be 64 |
| 53 | +/// byte aligned. |
| 54 | +VMATH_INLINE_DECL void vm_storeb_v16f(vm_float32_t output[16], vm_v16f_t vec); |
| 55 | + |
| 56 | +/// Load a float32 into all elements of a 16 element vector |
| 57 | +VMATH_INLINE_DECL vm_v16f_t vm_splat_v16f(vm_float32_t fill); |
| 58 | + |
| 59 | +/// Add two 16 element float32 vectors together, componentwise |
| 60 | +VMATH_INLINE_DECL vm_v16f_t vm_add_v16f(vm_v16f_t a, vm_v16f_t b); |
| 61 | +/// Subtract a 16 element float32 vector from another, componentwise |
| 62 | +VMATH_INLINE_DECL vm_v16f_t vm_sub_v16f(vm_v16f_t a, vm_v16f_t b); |
| 63 | +/// Multiply two 16 element float32 vectors together, componentwise |
| 64 | +VMATH_INLINE_DECL vm_v16f_t vm_mul_v16f(vm_v16f_t a, vm_v16f_t b); |
| 65 | +/// Divide a 16 element float32 vector by another, componentwise |
| 66 | +VMATH_INLINE_DECL vm_v16f_t vm_div_v16f(vm_v16f_t a, vm_v16f_t b); |
| 67 | + |
| 68 | +/// Add a constant float32 value to all the elements of a 16 element vector |
| 69 | +VMATH_INLINE_DECL vm_v16f_t vm_addc_v16f(vm_v16f_t a, vm_float32_t b); |
| 70 | +/// Subtract a constant float32 value from all the elements of a 16 element |
| 71 | +/// vector |
| 72 | +VMATH_INLINE_DECL vm_v16f_t vm_subc_v16f(vm_v16f_t a, vm_float32_t b); |
| 73 | +/// Multiply all the elements of a 16 element vector by a constant float32 value |
| 74 | +VMATH_INLINE_DECL vm_v16f_t vm_mulc_v16f(vm_v16f_t a, vm_float32_t b); |
| 75 | +/// Divide all the elements of a 16 element vector by a constant float32 value |
| 76 | +VMATH_INLINE_DECL vm_v16f_t vm_divc_v16f(vm_v16f_t a, vm_float32_t b); |
| 77 | + |
| 78 | +#endif // ifndef __VMATH_DECL_VEC16_F32_H |
0 commit comments