Skip to content

Commit 6f0978e

Browse files
committed
Support RISCV, stop violating ODR
- remove vector "batches" in favor of using underlying vec4, vec8, and vec16 types. - provide vec2 and vec3 etc as only storage types and additional functions for vec4 - no longer "header only"- I have learned this is only a c++ thing :(
1 parent f167468 commit 6f0978e

File tree

89 files changed

+2313
-1509
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+2313
-1509
lines changed

.gdbinit

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
set environment CK_FORK=no

build.zig

+31-10
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,25 @@ const builtin = @import("builtin");
33
const zcc = @import("compile_commands");
44
const app_name = "vmath";
55

6-
const test_flags = &[_][]const u8{
6+
const lib_flags = &[_][]const u8{
77
"-std=c99", // need inline and restrict
88
"-pedantic",
99
"-Wall",
10+
"-Werror",
11+
"-march=znver1",
12+
13+
// flag only for lib
1014
"-Iinclude/",
11-
"-march=znver1", // my pc architecture
1215
};
1316

17+
// test flags dont include "-Iinclude"
18+
const test_flags = lib_flags[0..(lib_flags.len - 1)];
19+
1420
const test_source_files = &[_][]const u8{
1521
"vec2_f32.c",
22+
"vec4_f32.c",
23+
"vec8_f32.c",
24+
"vec16_f32.c",
1625
};
1726

1827
pub fn build(b: *std.Build) !void {
@@ -23,14 +32,24 @@ pub fn build(b: *std.Build) !void {
2332
var tests = std.ArrayList(*std.Build.Step.Compile).init(b.allocator);
2433
defer tests.deinit();
2534

26-
b.installDirectory(.{
27-
.source_dir = .{ .src_path = .{
28-
.sub_path = "include/vmath/",
29-
.owner = b,
30-
} },
31-
.install_dir = .header,
32-
.install_subdir = "vmath/",
35+
var lib = b.addStaticLibrary(.{
36+
.name = "vmath",
37+
.optimize = optimize,
38+
.target = target,
39+
// TODO: figure out how to not have to link libc, needed for mm_malloc
40+
// in xmmtrin but im pretty sure its not needed in theory
41+
.link_libc = true,
42+
});
43+
lib.addCSourceFiles(.{
44+
.root = b.path("src/"),
45+
.files = &.{
46+
"impl.c",
47+
"memutil.c",
48+
},
49+
.flags = lib_flags,
3350
});
51+
lib.installHeadersDirectory(b.path("include/"), "", .{});
52+
b.installArtifact(lib);
3453

3554
for (test_source_files) |source_file| {
3655
var test_exe = b.addExecutable(.{
@@ -45,8 +64,9 @@ pub fn build(b: *std.Build) !void {
4564
} },
4665
.flags = test_flags,
4766
});
48-
test_exe.linkLibCpp();
67+
test_exe.linkLibC();
4968
test_exe.linkSystemLibrary("check");
69+
test_exe.linkLibrary(lib);
5070
try tests.append(test_exe);
5171
}
5272

@@ -65,5 +85,6 @@ pub fn build(b: *std.Build) !void {
6585

6686
try @import("templates/build.zig").generate(b, "code");
6787

88+
try tests.append(lib); // get intellisense for tests + lib
6889
zcc.createStep(b, "cdb", try tests.toOwnedSlice());
6990
}

include/vmath/client_query.h

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Header which defines functions related to querying information about the
3+
* running program: runtime known cpu features, compiled features, etc
4+
*/
5+
#ifndef __VMATH_CLIENT_QUERY_H
6+
#define __VMATH_CLIENT_QUERY_H
7+
8+
typedef enum
9+
{
10+
VM_FEATURE_SCALAR = 0x0,
11+
VM_FEATURE_SSE41 = 0x1,
12+
VM_FEATURE_AVX = 0x2,
13+
VM_FEATURE_AVX512 = 0x4,
14+
VM_FEATURE_ARM_NEON = 0x8,
15+
VM_FEATURE_RISCV_V1 = 0x10,
16+
} vm_feature_flags;
17+
18+
vm_feature_flags vm_get_features(void);
19+
20+
/// Get a string describing the feature which grants the current largest
21+
/// available simd register size. on arm, this is always "Arm Neon". On x86 with
22+
/// AVX2 but not AVX512, this will return "x86 AVX2".
23+
const char* vm_get_feature_string(void);
24+
25+
#endif

include/vmath/decl/vec16_f32.h

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#ifndef __VMATH_DECL_VEC16_F32_H
2+
#define __VMATH_DECL_VEC16_F32_H
3+
4+
#include "vmath/internal/intrinsics.h"
5+
#include "vmath/internal/stdfloat.h"
6+
7+
typedef struct VMATH_ALIGNED(64)
8+
{
9+
vm_float32_t buffer[16];
10+
} vm_v16fs_t;
11+
12+
#if defined(VMATH_AVX512_GENERIC_ENABLE)
13+
14+
typedef __m512 vm_v16f_t;
15+
16+
#elif defined(VMATH_AVX256_GENERIC_ENABLE)
17+
18+
// emulate 512 bits with 2x256
19+
typedef struct VMATH_ALIGNED(64)
20+
{
21+
__m256 buffer[2];
22+
} vm_v16f_t;
23+
24+
#elif defined(VMATH_SSE41_ENABLE)
25+
26+
// emulate 512 bits with 4x128
27+
typedef struct VMATH_ALIGNED(64)
28+
{
29+
__m128 buffer[4];
30+
} vm_v16f_t;
31+
32+
#elif defined(VMATH_ARM_ENABLE) || defined(VMATH_ARM64_ENABLE)
33+
#error ARM SIMD not implemented
34+
#elif defined(VMATH_RISCV_V1_ENABLE)
35+
#error RISCV vector extensions not implemented
36+
#else
37+
38+
typedef struct
39+
{
40+
vm_float32_t buffer[16];
41+
} vm_v16f_t;
42+
43+
#endif
44+
45+
/// Load 16 contiguous floats from memory. Memory must be 64 byte aligned.
46+
VMATH_INLINE_DECL vm_v16f_t vm_load_v16f(const vm_v16fs_t* vec);
47+
/// Load 16 contiguous floats from memory as a buffer of floats. Memory must be
48+
/// 64 byte aligned.
49+
VMATH_INLINE_DECL vm_v16f_t vm_loadb_v16f(const vm_float32_t vec[16]);
50+
/// Store 8 contiguous vec2s to memory. Memory must be 64 byte aligned.
51+
VMATH_INLINE_DECL void vm_store_v16f(vm_v16fs_t* output, vm_v16f_t vec);
52+
/// Store 8 contiguous vec2s to memory as a buffer of floats. Memory must be 64
53+
/// byte aligned.
54+
VMATH_INLINE_DECL void vm_storeb_v16f(vm_float32_t output[16], vm_v16f_t vec);
55+
56+
/// Load a float32 into all elements of a 16 element vector
57+
VMATH_INLINE_DECL vm_v16f_t vm_splat_v16f(vm_float32_t fill);
58+
59+
/// Add two 16 element float32 vectors together, componentwise
60+
VMATH_INLINE_DECL vm_v16f_t vm_add_v16f(vm_v16f_t a, vm_v16f_t b);
61+
/// Subtract a 16 element float32 vector from another, componentwise
62+
VMATH_INLINE_DECL vm_v16f_t vm_sub_v16f(vm_v16f_t a, vm_v16f_t b);
63+
/// Multiply two 16 element float32 vectors together, componentwise
64+
VMATH_INLINE_DECL vm_v16f_t vm_mul_v16f(vm_v16f_t a, vm_v16f_t b);
65+
/// Divide a 16 element float32 vector by another, componentwise
66+
VMATH_INLINE_DECL vm_v16f_t vm_div_v16f(vm_v16f_t a, vm_v16f_t b);
67+
68+
/// Add a constant float32 value to all the elements of a 16 element vector
69+
VMATH_INLINE_DECL vm_v16f_t vm_addc_v16f(vm_v16f_t a, vm_float32_t b);
70+
/// Subtract a constant float32 value from all the elements of a 16 element
71+
/// vector
72+
VMATH_INLINE_DECL vm_v16f_t vm_subc_v16f(vm_v16f_t a, vm_float32_t b);
73+
/// Multiply all the elements of a 16 element vector by a constant float32 value
74+
VMATH_INLINE_DECL vm_v16f_t vm_mulc_v16f(vm_v16f_t a, vm_float32_t b);
75+
/// Divide all the elements of a 16 element vector by a constant float32 value
76+
VMATH_INLINE_DECL vm_v16f_t vm_divc_v16f(vm_v16f_t a, vm_float32_t b);
77+
78+
#endif // ifndef __VMATH_DECL_VEC16_F32_H

0 commit comments

Comments
 (0)