Skip to content

Commit 4caeaf6

Browse files
committed
WIP: Try using preserve_none for setjmp
The `preserve_none` calling convention is a new calling convention in clang (>= 19) and gcc that preserves a more minimal set of registers (rsp, rbp on x86_64; lr, fp on aarch64). As a result, if this calling convention is used with setjmp, those registers do not need to be stored in the setjmp buffer, allowing us to reduce the size of this buffer and use fewer instructions to save the buffer. The tradeoff of course is that these registers may need to be saved anyway, in which case both the stack usage and the instructions just move to the caller (which is strictly worse). It is not clear that this is useful for exceptions (which already have a fair bit of state anyway, so even in the happy path the savings are not necessarily that big), but I am thinking about using it for #60281, which has different characteristics, so this is an easy way to try out whether there are any unexpected challenges. Note that preserve_none is a very recent compiler feature, so most compilers out there do not have it yet. For compatibility, this PR supports using different jump buffer formats in the runtime and the generated code.
1 parent 6c75e91 commit 4caeaf6

File tree

11 files changed

+288
-26
lines changed

11 files changed

+288
-26
lines changed

src/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ ifeq ($(OS),WINNT)
112112
SRCS += win32_ucontext
113113
endif
114114

115+
# Assembly sources for minimal setjmp (x86_64 Linux only for now)
116+
ASM_SRCS :=
117+
ifeq ($(OS),Linux)
118+
ifeq ($(ARCH),x86_64)
119+
ASM_SRCS += _jlsetjmp
120+
endif
121+
endif
122+
115123
ifeq ($(WITH_DTRACE),1)
116124
DTRACE_HEADERS := uprobes.h.gen
117125
ifneq ($(OS),Darwin)
@@ -229,6 +237,12 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal
229237
OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
230238
DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
231239

240+
# Assembly object files
241+
ASM_OBJS := $(ASM_SRCS:%=$(BUILDDIR)/%.o)
242+
ASM_DOBJS := $(ASM_SRCS:%=$(BUILDDIR)/%.dbg.obj)
243+
OBJS += $(ASM_OBJS)
244+
DOBJS += $(ASM_DOBJS)
245+
232246
CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
233247
CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
234248

@@ -313,6 +327,11 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONF
313327
@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(SHIPFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
314328
$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
315329
@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(DEBUGFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) -c $< -o $@)
330+
# Assembly source file rules
331+
$(BUILDDIR)/%.o: $(SRCDIR)/%.S | $(BUILDDIR)
332+
@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(SHIPFLAGS) -c $< -o $@)
333+
$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
334+
@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
316335
$(BUILDDIR)/%.o : $(SRCDIR)/%.d
317336
@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
318337
$(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d

src/_jlsetjmp.S

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
// Minimal setjmp/longjmp implementation for x86_64 Linux
4+
// These are designed to work with the preserve_none calling convention,
5+
// where all registers are caller-saved. This means we only need to save
6+
// the stack pointer - the compiler will spill all live registers before
7+
// calling setjmp.
8+
//
9+
// IMPORTANT: These functions ONLY work correctly when called with the
10+
// preserve_none calling convention. Without preserve_none, callee-saved
11+
// registers won't be restored properly after longjmp.
12+
13+
#if defined(__x86_64__) && defined(__linux__)
14+
15+
// Mark stack as non-executable
16+
.section .note.GNU-stack,"",@progbits
17+
18+
.text
19+
20+
// ============================================================================
21+
// jl_minimal_setjmp / ijl_minimal_setjmp
22+
// ============================================================================
23+
.p2align 4,0x90
24+
.globl ijl_minimal_setjmp
25+
.globl jl_minimal_setjmp
26+
.type ijl_minimal_setjmp,@function
27+
.type jl_minimal_setjmp,@function
28+
ijl_minimal_setjmp:
29+
jl_minimal_setjmp:
30+
// N.B: In preserve_none ABI, r12 is the first argument.
31+
// r12 = pointer to buffer (three pointer-sized slots: rbp, rsp, rip)
32+
// Save RSP as it was *before* the call instruction pushed the return address.
33+
lea 8(%rsp), %rax // rax = original RSP (before call)
34+
mov %rbp, 0(%r12) // save RBP to buffer[0]
35+
mov %rax, 8(%r12) // save RSP to buffer[1]
36+
mov (%rsp), %rax // rax = return address
37+
mov %rax, 16(%r12) // save return address to buffer[2]
38+
xor %eax, %eax // return 0 (setjmp returns 0 on first call)
39+
ret
40+
.size ijl_minimal_setjmp, . - ijl_minimal_setjmp
41+
.size jl_minimal_setjmp, . - jl_minimal_setjmp
42+
43+
44+
// ============================================================================
45+
// jl_minimal_longjmp / ijl_minimal_longjmp
46+
// ============================================================================
47+
.p2align 4,0x90
48+
.globl ijl_minimal_longjmp
49+
.globl jl_minimal_longjmp
50+
.type ijl_minimal_longjmp,@function
51+
.type jl_minimal_longjmp,@function
52+
ijl_minimal_longjmp:
53+
jl_minimal_longjmp:
54+
// rdi = pointer to buffer (two pointer-sized slots: rsp, rip)
55+
// esi = return value (passed to setjmp caller)
56+
mov %esi, %eax // set return value
57+
test %eax, %eax // longjmp must return non-zero
58+
jne 1f
59+
inc %eax // if val was 0, return 1
60+
1:
61+
mov 0(%rdi), %rbp // restore RBP
62+
mov 8(%rdi), %rsp // restore RSP (to pre-call value)
63+
jmp *16(%rdi) // jump to saved return address
64+
.size ijl_minimal_longjmp, . - ijl_minimal_longjmp
65+
.size jl_minimal_longjmp, . - jl_minimal_longjmp
66+
67+
#endif

src/codegen.cpp

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,14 @@ static const auto jlenter_func = new JuliaFunction<>{
10331033
{T_pjlvalue, getPointerTy(C)}, false); },
10341034
nullptr,
10351035
};
1036+
static const auto jlentermin_func = new JuliaFunction<>{
1037+
XSTR(jl_enter_min_handler),
1038+
[](LLVMContext &C) {
1039+
auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
1040+
return FunctionType::get(getVoidTy(C),
1041+
{T_pjlvalue, getPointerTy(C)}, false); },
1042+
nullptr,
1043+
};
10361044
static const auto jl_current_exception_func = new JuliaFunction<>{
10371045
XSTR(jl_current_exception),
10381046
[](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_pjlvalue_ty(C)}, false); },
@@ -1239,6 +1247,17 @@ static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
12391247
{T_size, PointerType::get(C, AddressSpace::Derived)}, false); },
12401248
nullptr,
12411249
};
1250+
static const auto setjmp_min_func = new JuliaFunction<TypeFnContextAndTriple>{
1251+
XSTR(jl_minimal_setjmp),
1252+
[](LLVMContext &C, const Triple &T) {
1253+
return FunctionType::get(getInt32Ty(C),
1254+
{getPointerTy(C)}, false);
1255+
},
1256+
[](LLVMContext &C) { return AttributeList::get(C,
1257+
Attributes(C, {Attribute::ReturnsTwice}),
1258+
AttributeSet(),
1259+
None); },
1260+
};
12421261
static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
12431262
jl_setjmp_name,
12441263
[](LLVMContext &C, const Triple &T) {
@@ -6151,8 +6170,13 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
61516170
}
61526171
}
61536172
ctx.builder.CreateCall(prepare_call(jlleave_noexcept_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), handler_to_end.size())});
6173+
#ifdef JL_HAVE_MIN_SETJMP
6174+
auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
6175+
sizeof(struct _jl_handler_min_setjmp));
6176+
#else
61546177
auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
6155-
sizeof(jl_handler_t));
6178+
sizeof(struct _jl_handler_setjmp));
6179+
#endif
61566180
for (AllocaInst *handler : handler_to_end) {
61576181
ctx.builder.CreateLifetimeEnd(handler, handler_sz64);
61586182
}
@@ -9355,17 +9379,31 @@ static jl_llvm_functions_t
93559379
ctx.ssavalue_assigned[cursor] = true;
93569380
// Actually enter the exception frame
93579381
auto ct = get_current_task(ctx);
9382+
#if JL_HAVE_MIN_SETJMP
9383+
auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
9384+
sizeof(struct _jl_handler_min_setjmp));
9385+
AllocaInst* ehbuff = emit_static_alloca(ctx, sizeof(struct _jl_handler_min_setjmp), Align(16));
9386+
#else
93589387
auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
9359-
sizeof(jl_handler_t));
9360-
AllocaInst* ehbuff = emit_static_alloca(ctx, sizeof(jl_handler_t), Align(16));
9388+
sizeof(struct _jl_handler_setjmp));
9389+
AllocaInst* ehbuff = emit_static_alloca(ctx, sizeof(struct _jl_handler_setjmp), Align(16));
9390+
#endif
93619391
ctx.eh_buffers[stmt] = ehbuff;
93629392
ctx.builder.CreateLifetimeStart(ehbuff, handler_sz64);
9363-
ctx.builder.CreateCall(prepare_call(jlenter_func), {ct, ehbuff});
93649393
CallInst *sj;
9394+
#if JL_HAVE_MIN_SETJMP
9395+
ctx.builder.CreateCall(prepare_call(jlentermin_func), {ct, ehbuff});
9396+
Value *jmpbuf = emit_ptrgep(ctx, ehbuff, offsetof(struct _jl_handler_min_setjmp, min_eh_ctx));
9397+
sj = ctx.builder.CreateCall(prepare_call(setjmp_min_func), {jmpbuf});
9398+
sj->setCallingConv(CallingConv::PreserveNone);
9399+
#else
9400+
Value *jmpbuf = emit_ptrgep(ctx, ehbuff, offsetof(struct _jl_handler_setjmp, eh_ctx));
9401+
ctx.builder.CreateCall(prepare_call(jlenter_func), {ct, ehbuff});
93659402
if (ctx.emission_context.TargetTriple.isOSWindows())
9366-
sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {ehbuff});
9403+
sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {jmpbuf});
93679404
else
9368-
sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {ehbuff, ConstantInt::get(Type::getInt32Ty(ctx.builder.getContext()), 0)});
9405+
sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {jmpbuf, ConstantInt::get(Type::getInt32Ty(ctx.builder.getContext()), 0)});
9406+
#endif
93699407
// We need to mark this on the call site as well. See issue #6757
93709408
sj->setCanReturnTwice();
93719409
Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
@@ -9994,6 +10032,9 @@ static void init_jit_functions(void)
999410032
add_named_global(jlnew_func, &jl_new_structv);
999510033
add_named_global(jlsplatnew_func, &jl_new_structt);
999610034
add_named_global(setjmp_func, &jl_setjmp_f);
10035+
#if JL_HAVE_MIN_SETJMP
10036+
add_named_global(setjmp_min_func, &jl_minimal_setjmp);
10037+
#endif
999710038
add_named_global(memcmp_func, &memcmp);
999810039
add_named_global(jltypeerror_func, &jl_type_error);
999910040
add_named_global(jlcheckassign_func, &jl_checked_assignment);
@@ -10009,6 +10050,7 @@ static void init_jit_functions(void)
1000910050
add_named_global(jlmethod_func, &jl_method_def);
1001010051
add_named_global(jlgenericfunction_func, &jl_declare_const_gf);
1001110052
add_named_global(jlenter_func, &jl_enter_handler);
10053+
add_named_global(jlentermin_func, &jl_enter_min_handler);
1001210054
add_named_global(jl_current_exception_func, &jl_current_exception);
1001310055
add_named_global(jlleave_noexcept_func, &jl_pop_handler_noexcept);
1001410056
add_named_global(jlleave_func, &jl_pop_handler);

src/interpreter.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
469469

470470
static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel)
471471
{
472-
jl_handler_t __eh;
472+
jl_handler_preferred_t __eh;
473473
size_t ns = jl_array_nrows(stmts);
474474
jl_task_t *ct = jl_current_task;
475475

@@ -506,7 +506,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
506506
s->locals[jl_source_nslots(s->src) + id] = val;
507507
}
508508
else if (jl_is_enternode(stmt)) {
509-
jl_enter_handler(ct, &__eh);
509+
jl_enter_handler(ct, &__eh._handler);
510510
// This is a bit tricky, but supports the implementation of PhiC nodes.
511511
// They are conceptually slots, but the slot to store to doesn't get explicitly
512512
// mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
@@ -545,29 +545,29 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
545545
// replaced later
546546
JL_GC_PUSH1(&scope);
547547
ct->scope = scope;
548-
if (!jl_setjmp(__eh.eh_ctx, 0)) {
549-
ct->eh = &__eh;
548+
if (!JL_EH_SETJMP(__eh)) {
549+
ct->eh = &__eh._handler;
550550
eval_body(stmts, s, next_ip, toplevel);
551551
jl_unreachable();
552552
}
553553
JL_GC_POP();
554554
}
555555
else {
556-
if (!jl_setjmp(__eh.eh_ctx, 0)) {
557-
ct->eh = &__eh;
556+
if (!JL_EH_SETJMP(__eh)) {
557+
ct->eh = &__eh._handler;
558558
eval_body(stmts, s, next_ip, toplevel);
559559
jl_unreachable();
560560
}
561561
}
562562

563563
if (s->continue_at) { // means we reached a :leave expression
564-
jl_eh_restore_state_noexcept(ct, &__eh);
564+
jl_eh_restore_state_noexcept(ct, &__eh._handler);
565565
ip = s->continue_at;
566566
s->continue_at = 0;
567567
continue;
568568
}
569569
else { // a real exception
570-
jl_eh_restore_state(ct, &__eh);
570+
jl_eh_restore_state(ct, &__eh._handler);
571571
ip = catch_ip;
572572
assert(jl_enternode_catch_dest(stmt) != 0);
573573
continue;
@@ -617,8 +617,8 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
617617
// leave happens during normal control flow, but we must
618618
// longjmp to pop the eval_body call for each enter.
619619
s->continue_at = next_ip;
620-
asan_unpoison_task_stack(ct, &eh->eh_ctx);
621-
jl_longjmp(eh->eh_ctx, 1);
620+
asan_unpoison_eh_task_stack(ct, eh);
621+
jl_eh_longjmp(eh);
622622
}
623623
}
624624
else if (head == jl_pop_exception_sym) {

src/jl_exported_funcs.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
XX(jl_egal) \
107107
XX(jl_egal__bits) \
108108
XX(jl_egal__bitstag) \
109+
XX(jl_eh_longjmp) \
109110
XX(jl_eh_restore_state) \
110111
XX(jl_eh_restore_state_noexcept) \
111112
XX(jl_enter_handler) \
@@ -301,6 +302,8 @@
301302
XX(jl_maxrss) \
302303
XX(jl_method_def) \
303304
XX(jl_method_instance_add_backedge) \
305+
XX(jl_minimal_longjmp) \
306+
XX(jl_minimal_setjmp) \
304307
XX(jl_method_table_add_backedge) \
305308
XX(jl_method_table_disable) \
306309
XX(jl_method_table_for) \

0 commit comments

Comments
 (0)