Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

goalc: Some more macOS ARM64 work #3290

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .github/workflows/build-matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ jobs:
cmakePreset: "Release-macos-clang"
cachePrefix: ""

# Q4 2023 there will hopefully be native arm64 runners
# https://github.com/github/roadmap/issues/528
# build_macos_arm:
# name: "🍎 MacOS"
# uses: ./.github/workflows/macos-build-arm.yaml
# with:
# cmakePreset: "Release-macos-clang"
# cachePrefix: ""
# There are ARM64 macOS runners, but they aren't free _yet_
# limited to the large runners right now
build_macos_arm:
name: "🍎 MacOS"
uses: ./.github/workflows/macos-build-arm.yaml
with:
cmakePreset: "Release-macos-arm64-clang"
cachePrefix: ""
15 changes: 9 additions & 6 deletions .github/workflows/macos-build-arm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:
jobs:
build:
name: ARM
runs-on: macos-latest
runs-on: macos-12
timeout-minutes: 120

env: # overrides: https://github.com/mbitsnbites/buildcache/blob/master/doc/configuration.md
Expand All @@ -27,11 +27,13 @@ jobs:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set up ARM64 environment
run: sudo softwareupdate --install-rosetta --agree-to-license
# TODO - not relevant on an intel runner
# - name: Set up ARM64 environment
# run: sudo softwareupdate --install-rosetta --agree-to-license

- name: Install Package Dependencies
run: arch -arm64 brew install cmake ninja
# TODO - arch -arm64
run: brew install cmake ninja

- name: Setup Buildcache
uses: mikehardy/[email protected]
Expand All @@ -51,8 +53,9 @@ jobs:
- name: Build Project
run: cmake --build build --parallel $((`sysctl -n hw.logicalcpu`))

- name: Run Tests
run: ./test.sh
# TODO - soon TM
# - name: Run Tests
# run: ./test.sh

- name: Upload artifact
uses: actions/upload-artifact@v3
Expand Down
22 changes: 20 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,32 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Run C++ Tests LLDB",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/goalc-test",
"args": [
"--gtest_brief=0",
"--gtest_filter=*CodeTester*",
"--gtest_break_on_failure"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "lldb"
},
{
"name": "Append File Docs",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/scripts/ci/lint-characters.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}",
"args": ["--fix"]
"args": [
"--fix"
]
},
]
}
}
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
message(STATUS "AppleClang detected - Setting Defaults")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} \
-march=native \
-Wall \
-Winit-self \
-ggdb \
Expand All @@ -121,6 +120,11 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-fdiagnostics-color=always"
)

# TODO - make a proper flag for arm compiling
if(NOT CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif()

# additional c++ flags for release mode for our projects
if(CMAKE_BUILD_TYPE MATCHES "Release")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
Expand Down
10 changes: 10 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,16 @@
"description": "Build with Clang as Release without Debug Symbols",
"inherits": ["base-macos-release", "base-clang"]
},
{
"name": "Release-macos-arm64-clang",
"displayName": "MacOS ARM64 Release (clang)",
"description": "Build with Clang, cross compiled for ARM64, as Release without Debug Symbols",
"inherits": ["base-macos-release", "base-clang"],
"cacheVariables": {
"CMAKE_C_COMPILER_TARGET": "arm64-apple-darwin",
"CMAKE_CXX_COMPILER_TARGET": "arm64-apple-darwin"
}
},
{
"name": "Release-macos-clang-static",
"displayName": "MacOS Static Release (clang)",
Expand Down
11 changes: 11 additions & 0 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ tasks:
ignore_error: true
- cmd: npx prettier --write ./decompiler/config/jak2/**/*.jsonc
ignore_error: true
gen-cmake:
desc: "Generate the CMake"
cmds:
- "cmake -B build --preset={{.CMAKE_PRESET}}"
build:
desc: "Build the project using the generated CMake"
cmds:
- "cmake --build build --parallel {{.CMAKE_NUM_THREADS}}"
# DECOMPILING
decomp:
cmds:
Expand Down Expand Up @@ -172,3 +180,6 @@ tasks:
type-test:
cmds:
- cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*Jak2TypeConsistency*" --gtest_break_on_failure'
tests-filtered:
cmds:
- cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*{{.FILTER}}*" --gtest_break_on_failure'
1 change: 1 addition & 0 deletions common/util/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ void __cpuidex(int result[4], int eax, int ecx) {
: "0"(eax), "2"(ecx));
}
#else
// TODO - implement ARM64 detection, check for NEON instead of AVX
// for now, just return 0's.
void __cpuidex(int result[4], int eax, int ecx) {
lg::warn("cpuid not implemented on this platform");
Expand Down
6 changes: 3 additions & 3 deletions game/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ else()
message(STATUS "Non-ARM64 architecture detected")
endif()

if(ARM64_ARCH)
# TODO - kinda a gross hack, should pass in an explicit flag later
if(ARM64_ARCH OR CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin")
# Add your ARM64-specific configuration or build options here
set(OG_ASM_FUNCS_FILE kernel/asm_funcs_arm64.s)
enable_language(ASM)
set(CMAKE_ASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_SOURCE_FILE_EXTENSIONS} s)
# set(CMAKE_ASM_COMPILE_OBJECT "${CMAKE_ASM_COMPILER} -o <OBJECT> <SOURCE>")
set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-g")
set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-arch arm64 -g")
else()
set(OG_ASM_FUNCS_FILE kernel/asm_funcs_x86_64.asm)
enable_language(ASM_NASM)
Expand Down
2 changes: 1 addition & 1 deletion game/graphics/opengl_renderer/background/Shrub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ void Shrub::render_tree(int idx,
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
}
#else
interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data());
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
#endif
tree.perf.tod_time.add(interp_timer.getSeconds());

Expand Down
2 changes: 1 addition & 1 deletion game/graphics/opengl_renderer/background/TFragment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ void TFragment::render_tree(int geom,
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
}
#else
interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data());
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
#endif
glActiveTexture(GL_TEXTURE10);
glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture);
Expand Down
2 changes: 1 addition & 1 deletion game/graphics/opengl_renderer/background/Tie3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ void Tie3::setup_tree(int idx,
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
}
#else
interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data());
interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data());
#endif

glActiveTexture(GL_TEXTURE10);
Expand Down
7 changes: 7 additions & 0 deletions game/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ void deci2_runner(SystemThreadInterface& iface) {
void ee_runner(SystemThreadInterface& iface) {
prof().root_event();
// Allocate Main RAM. Must have execute enabled.
// TODO Apple Silicone - You cannot make a page be RWX,
// or more specifically it can't be both writable and executable at the same time
//
// https://github.com/zherczeg/sljit/issues/99
//
// The solution to this is to flip-flop between permissions, or perhaps have two threads
// one that has writing permission, and another with executable permission
if (EE_MEM_LOW_MAP) {
g_ee_main_mem =
(u8*)mmap((void*)0x10000000, EE_MAIN_MEM_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE,
Expand Down
2 changes: 1 addition & 1 deletion game/system/hid/input_bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ extern const InputBindingGroups DEFAULT_MOUSE_BINDS;
// So there are some potential solutions but this doesn't feel high priority and this was always an
// issue.
struct CommandBinding {
enum Source { CONTROLLER, KEYBOARD, MOUSE };
enum class Source { CONTROLLER, KEYBOARD, MOUSE };

CommandBinding(const u32 _host_key, std::function<void()> _command)
: host_key(_host_key), command(_command){};
Expand Down
2 changes: 2 additions & 0 deletions goalc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ add_library(compiler
data_compiler/DataObjectGenerator.cpp
debugger/Debugger.cpp
debugger/DebugInfo.cpp
emitter/IGenX86.cpp
emitter/IGenARM64.cpp
listener/Listener.cpp
listener/MemoryMap.cpp
make/MakeSystem.cpp
Expand Down
18 changes: 9 additions & 9 deletions goalc/compiler/CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// count how many xmm's we have to backup
int n_xmm_backups = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
if (saved_reg.is_128bit_simd()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(unrelated to your changes here) I think that we will probably want an entirely separate do_goal_function for x86 vs. ARM. I think the way that the stack pointer is manipulated will be different, and this currently has some x86-specific things like sub_gpr64_imm8s which is using an x86-only smaller size instruction because an immediate is only 8-bits.

n_xmm_backups++;
}
}
Expand All @@ -105,7 +105,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// back up xmms
int i = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
if (saved_reg.is_128bit_simd()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be better to keep these as is_xmm. I think having both an is_xmm (true only for x86 XMM) and an is_128bit_simd (true for both arm/x86 vector regs) is good, rather than trying to have a abstraction over "general purpose" and "128bit" registers at this level.

ARM will probably have different code for loading/storing register to the stack. It has instructions for storing multiple registers at once, rather than a sequence of push instructions like we used on x86.

int offset = i * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset),
InstructionInfo::Kind::PROLOGUE);
Expand All @@ -116,7 +116,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
} else {
// back up xmms (currently not aligned)
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
if (saved_reg.is_128bit_simd()) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE),
InstructionInfo::Kind::PROLOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
Expand Down Expand Up @@ -183,12 +183,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
m_gen.add_instr(IGen::load64_gpr64_plus_s32(
op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
} else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) {
// load xmm32 off of the stack
m_gen.add_instr(IGen::load_reg_offset_xmm32(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
} else if (op.reg.is_128bit_simd() &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::load128_xmm128_reg_offset(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
Expand All @@ -210,12 +210,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
m_gen.add_instr(IGen::store64_gpr64_plus_s32(
RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
} else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) {
// store xmm32 on the stack
m_gen.add_instr(IGen::store_reg_offset_xmm32(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
} else if (op.reg.is_128bit_simd() &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::store128_xmm128_reg_offset(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
Expand Down Expand Up @@ -254,7 +254,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
int j = n_xmm_backups;
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
if (saved_reg.is_128bit_simd()) {
j--;
int offset = j * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset),
Expand All @@ -268,7 +268,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
} else {
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
if (saved_reg.is_128bit_simd()) {
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
InstructionInfo::Kind::EPILOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE),
Expand Down
8 changes: 6 additions & 2 deletions goalc/compiler/IR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen,
auto dest_reg = get_reg(m_dest, allocs, irec);
if (m_name == "#f") {
static_assert(false_symbol_offset() == 0, "false symbol location");
if (dest_reg.is_xmm()) {
if (dest_reg.is_128bit_simd()) {
gen->add_instr(IGen::movq_xmm64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
} else {
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
Expand Down Expand Up @@ -862,7 +862,8 @@ RegAllocInstr IR_ConditionalBranch::to_rai() {
void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
Instruction jump_instr(0);
#ifndef __aarch64__
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather keep the ability for the compiler to generate either ARM or x86 when built on either ARM or x86.

In C++, the code inside the #else here won't be checked by the compiler at all on x86, which will make it harder for developers on x86 to know if their changes broke the build on ARM.

Instruction jump_instr = InstructionX86(0);
ASSERT(m_resolved);
switch (condition.kind) {
case ConditionKind::EQUAL:
Expand Down Expand Up @@ -916,6 +917,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen,

auto jump_rec = gen->add_instr(jump_instr, irec);
gen->link_instruction_jump(jump_rec, gen->get_future_ir_record_in_same_func(irec, label.idx));
#else
// TODO - ARM64
#endif
}

/////////////////////
Expand Down
4 changes: 2 additions & 2 deletions goalc/compiler/compilation/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,

auto cc = get_function_calling_convention(function->type(), m_ts);
RegClass ret_reg_class = RegClass::GPR_64;
if (cc.return_reg && cc.return_reg->is_xmm()) {
if (cc.return_reg && cc.return_reg->is_128bit_simd()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 I think this is a good place to use is_128bit_simd over is_xmm

ret_reg_class = RegClass::INT_128;
}

Expand Down Expand Up @@ -625,7 +625,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form,
const auto& arg = args.at(i);
auto reg = cc.arg_regs.at(i);
arg_outs.push_back(
env->make_ireg(arg->type(), reg.is_xmm() ? RegClass::INT_128 : RegClass::GPR_64));
env->make_ireg(arg->type(), reg.is_128bit_simd() ? RegClass::INT_128 : RegClass::GPR_64));
arg_outs.back()->mark_as_settable();
env->emit_ir<IR_RegSet>(form, arg_outs.back(), arg);
}
Expand Down
4 changes: 2 additions & 2 deletions goalc/debugger/disassemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ struct InstructionInfo {
int ir_idx = -1;
int offset = -1;

InstructionInfo(const emitter::Instruction& _instruction, Kind _kind)
InstructionInfo(const emitter::Instruction _instruction, Kind _kind)
: instruction(_instruction), kind(_kind) {}

InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx)
InstructionInfo(const emitter::Instruction _instruction, Kind _kind, int _ir_idx)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on what I've read above, I think that you have classes which inherit from Instruction, and you'll pass them here. This will cause the copy constructor of Instruction (the base class) to be called, which will cause slicing (see https://stackoverflow.com/questions/274626/what-is-object-slicing#274636). This ends up being undefined behavior and probably means that any virtual methods of Instruction that you call will crash or silently do the wrong thing.

The way around this is usually to pass a std::unique_ptr<Instruction> or std::shared_ptr<Instruction>... but it might be worth checking what we actually need an instruction for here. As far as I see, we just need the length, rather than the whole instruction.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(oops - I think I'm wrong since Instruction is just InstructionX86 or InstructionARM)
In that case, can we keep this as a const ref?

: instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {}
};

Expand Down
Loading