-
Notifications
You must be signed in to change notification settings - Fork 178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
goalc: Some more macOS ARM64 work #3290
base: master
Are you sure you want to change the base?
Changes from all commits
c63e6bd
0b667d3
5a0cd2d
dc863b2
e57d430
5581536
7f3b950
19b5eea
1e281d3
0ca6a5a
8bb8dfe
3767847
92ee892
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ on: | |
jobs: | ||
build: | ||
name: ARM | ||
runs-on: macos-latest | ||
runs-on: macos-12 | ||
timeout-minutes: 120 | ||
|
||
env: # overrides: https://github.com/mbitsnbites/buildcache/blob/master/doc/configuration.md | ||
|
@@ -27,11 +27,13 @@ jobs: | |
- name: Checkout Repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up ARM64 environment | ||
run: sudo softwareupdate --install-rosetta --agree-to-license | ||
# TODO - not relevant on an intel runner | ||
# - name: Set up ARM64 environment | ||
# run: sudo softwareupdate --install-rosetta --agree-to-license | ||
|
||
- name: Install Package Dependencies | ||
run: arch -arm64 brew install cmake ninja | ||
# TODO - arch -arm64 | ||
run: brew install cmake ninja | ||
|
||
- name: Setup Buildcache | ||
uses: mikehardy/[email protected] | ||
|
@@ -51,8 +53,9 @@ jobs: | |
- name: Build Project | ||
run: cmake --build build --parallel $((`sysctl -n hw.logicalcpu`)) | ||
|
||
- name: Run Tests | ||
run: ./test.sh | ||
# TODO - soon TM | ||
# - name: Run Tests | ||
# run: ./test.sh | ||
|
||
- name: Upload artifact | ||
uses: actions/upload-artifact@v3 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,7 +88,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
// count how many xmm's we have to backup | ||
int n_xmm_backups = 0; | ||
for (auto& saved_reg : allocs.used_saved_regs) { | ||
if (saved_reg.is_xmm()) { | ||
if (saved_reg.is_128bit_simd()) { | ||
n_xmm_backups++; | ||
} | ||
} | ||
|
@@ -105,7 +105,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
// back up xmms | ||
int i = 0; | ||
for (auto& saved_reg : allocs.used_saved_regs) { | ||
if (saved_reg.is_xmm()) { | ||
if (saved_reg.is_128bit_simd()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be better to keep these as ARM will probably have different code for loading/storing register to the stack. It has instructions for storing multiple registers at once, rather than a sequence of |
||
int offset = i * XMM_SIZE; | ||
m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset), | ||
InstructionInfo::Kind::PROLOGUE); | ||
|
@@ -116,7 +116,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
} else { | ||
// back up xmms (currently not aligned) | ||
for (auto& saved_reg : allocs.used_saved_regs) { | ||
if (saved_reg.is_xmm()) { | ||
if (saved_reg.is_128bit_simd()) { | ||
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), | ||
InstructionInfo::Kind::PROLOGUE); | ||
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg), | ||
|
@@ -183,12 +183,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
m_gen.add_instr(IGen::load64_gpr64_plus_s32( | ||
op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP), | ||
i_rec); | ||
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { | ||
} else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { | ||
// load xmm32 off of the stack | ||
m_gen.add_instr(IGen::load_reg_offset_xmm32( | ||
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), | ||
i_rec); | ||
} else if (op.reg.is_xmm() && | ||
} else if (op.reg.is_128bit_simd() && | ||
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { | ||
m_gen.add_instr(IGen::load128_xmm128_reg_offset( | ||
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), | ||
|
@@ -210,12 +210,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
m_gen.add_instr(IGen::store64_gpr64_plus_s32( | ||
RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg), | ||
i_rec); | ||
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { | ||
} else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { | ||
// store xmm32 on the stack | ||
m_gen.add_instr(IGen::store_reg_offset_xmm32( | ||
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), | ||
i_rec); | ||
} else if (op.reg.is_xmm() && | ||
} else if (op.reg.is_128bit_simd() && | ||
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { | ||
m_gen.add_instr(IGen::store128_xmm128_reg_offset( | ||
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), | ||
|
@@ -254,7 +254,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
int j = n_xmm_backups; | ||
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { | ||
auto& saved_reg = allocs.used_saved_regs.at(i); | ||
if (saved_reg.is_xmm()) { | ||
if (saved_reg.is_128bit_simd()) { | ||
j--; | ||
int offset = j * XMM_SIZE; | ||
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset), | ||
|
@@ -268,7 +268,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { | |
} else { | ||
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { | ||
auto& saved_reg = allocs.used_saved_regs.at(i); | ||
if (saved_reg.is_xmm()) { | ||
if (saved_reg.is_128bit_simd()) { | ||
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP), | ||
InstructionInfo::Kind::EPILOGUE); | ||
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -240,7 +240,7 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen, | |
auto dest_reg = get_reg(m_dest, allocs, irec); | ||
if (m_name == "#f") { | ||
static_assert(false_symbol_offset() == 0, "false symbol location"); | ||
if (dest_reg.is_xmm()) { | ||
if (dest_reg.is_128bit_simd()) { | ||
gen->add_instr(IGen::movq_xmm64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); | ||
} else { | ||
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); | ||
|
@@ -862,7 +862,8 @@ RegAllocInstr IR_ConditionalBranch::to_rai() { | |
void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, | ||
const AllocationResult& allocs, | ||
emitter::IR_Record irec) { | ||
Instruction jump_instr(0); | ||
#ifndef __aarch64__ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather keep the ability for the compiler to generate either ARM or x86 when built on either ARM or x86. In C++, the code inside the |
||
Instruction jump_instr = InstructionX86(0); | ||
ASSERT(m_resolved); | ||
switch (condition.kind) { | ||
case ConditionKind::EQUAL: | ||
|
@@ -916,6 +917,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, | |
|
||
auto jump_rec = gen->add_instr(jump_instr, irec); | ||
gen->link_instruction_jump(jump_rec, gen->get_future_ir_record_in_same_func(irec, label.idx)); | ||
#else | ||
// TODO - ARM64 | ||
#endif | ||
} | ||
|
||
///////////////////// | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -591,7 +591,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, | |
|
||
auto cc = get_function_calling_convention(function->type(), m_ts); | ||
RegClass ret_reg_class = RegClass::GPR_64; | ||
if (cc.return_reg && cc.return_reg->is_xmm()) { | ||
if (cc.return_reg && cc.return_reg->is_128bit_simd()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 I think this is a good place to use |
||
ret_reg_class = RegClass::INT_128; | ||
} | ||
|
||
|
@@ -625,7 +625,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, | |
const auto& arg = args.at(i); | ||
auto reg = cc.arg_regs.at(i); | ||
arg_outs.push_back( | ||
env->make_ireg(arg->type(), reg.is_xmm() ? RegClass::INT_128 : RegClass::GPR_64)); | ||
env->make_ireg(arg->type(), reg.is_128bit_simd() ? RegClass::INT_128 : RegClass::GPR_64)); | ||
arg_outs.back()->mark_as_settable(); | ||
env->emit_ir<IR_RegSet>(form, arg_outs.back(), arg); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,10 +22,10 @@ struct InstructionInfo { | |
int ir_idx = -1; | ||
int offset = -1; | ||
|
||
InstructionInfo(const emitter::Instruction& _instruction, Kind _kind) | ||
InstructionInfo(const emitter::Instruction _instruction, Kind _kind) | ||
: instruction(_instruction), kind(_kind) {} | ||
|
||
InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx) | ||
InstructionInfo(const emitter::Instruction _instruction, Kind _kind, int _ir_idx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on what I've read above, I think that you have classes which inherit from The way around this is usually to pass a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (oops - I think I'm wrong since |
||
: instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {} | ||
}; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(unrelated to your changes here) I think that we will probably want an entirely separate
do_goal_function
for x86 vs. ARM. I think the way that the stack pointer is manipulated will be different, and this currently has some x86-specific things likesub_gpr64_imm8s
which is using an x86-only smaller size instruction because an immediate is only 8-bits.