Skip to content

Restore alloca type#2902

Merged
wsmoses merged 8 commits intomainfrom
rat
Jan 19, 2026
Merged

Restore alloca type#2902
wsmoses merged 8 commits intomainfrom
rat

Conversation

@wsmoses
Copy link
Member

@wsmoses wsmoses commented Jan 15, 2026

Requires JuliaLang/julia#60695 to be effective.

Fixes numerous 1.12 problems.

Essentially suppose we have a gc alloc of a complex64. LLVMAlloc opt will change this into an [16 x i8]. Then sroa will turn that into 16 individual byte-sized extract/insert -- which we cannot actually handle properly

@wsmoses wsmoses requested a review from vchuravy January 15, 2026 03:54
@github-actions
Copy link
Contributor

github-actions bot commented Jan 15, 2026

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic main) to apply these changes.

Click here to view the suggested changes.
diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl
index 5651bc55..61bf69f4 100644
--- a/src/compiler/optimize.jl
+++ b/src/compiler/optimize.jl
@@ -79,7 +79,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
     function middle_optimize!(second_stage=false)
     @dispose pb = NewPMPassBuilder() begin
         registerEnzymeAndPassPipeline!(pb)
-        register!(pb, RestoreAllocaType())
+            register!(pb, RestoreAllocaType())
         add!(pb, NewPMAAManager()) do aam
             add!(aam, ScopedNoAliasAA())
             add!(aam, TypeBasedAA())
@@ -102,7 +102,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
                 add!(fpm, ReassociatePass())
                 add!(fpm, EarlyCSEPass())
                 add!(fpm, AllocOptPass())
-                add!(fpm, RestoreAllocaType())
+                    add!(fpm, RestoreAllocaType())
 
                 add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
                     add!(lpm, LoopIdiomRecognizePass())
@@ -122,7 +122,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
 		# todo peeling=false?
                 add!(fpm, LoopUnrollPass(opt_level=2, partial=false)) # what opt level?
                 add!(fpm, AllocOptPass())
-                add!(fpm, RestoreAllocaType())
+                    add!(fpm, RestoreAllocaType())
                 add!(fpm, SROAPass())
                 add!(fpm, GVNPass())
 
@@ -137,7 +137,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
                 add!(fpm, JumpThreadingPass())
                 add!(fpm, DSEPass())
                 add!(fpm, AllocOptPass())
-                add!(fpm, RestoreAllocaType())
+                    add!(fpm, RestoreAllocaType())
                 add!(fpm, SimplifyCFGPass())
 
 
@@ -229,7 +229,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
         # merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
         # pass.
 
-        add!(fpm, AllocOptPass())        
+        add!(fpm, AllocOptPass())
         add!(fpm, RestoreAllocaType())
         # consider AggressiveInstCombinePass at optlevel > 2
 
@@ -292,7 +292,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
 
         # More dead allocation (store) deletion before loop optimization
         # consider removing this:
-        add!(fpm, AllocOptPass())        
+        add!(fpm, AllocOptPass())
         add!(fpm, RestoreAllocaType())
 
         # see if all of the constant folding has exposed more loops
@@ -446,13 +446,13 @@ function post_optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine, machine::Bool
         registerEnzymeAndPassPipeline!(pb)
         register!(pb, ReinsertGCMarkerPass())
         register!(pb, SafeAtomicToRegularStorePass())
-		register!(pb, RestoreAllocaType())
+        register!(pb, RestoreAllocaType())
         add!(pb, NewPMAAManager()) do aam
             add!(aam, ScopedNoAliasAA())
             add!(aam, TypeBasedAA())
             add!(aam, BasicAA())
         end
-        add!(pb, NewPMModulePassManager()) do mpm		
+        add!(pb, NewPMModulePassManager()) do mpm
             addOptimizationPasses!(mpm)
             if machine
                 # TODO enable validate_return_roots
diff --git a/src/llvm/attributes.jl b/src/llvm/attributes.jl
index 1b5128ea..3c5e1381 100644
--- a/src/llvm/attributes.jl
+++ b/src/llvm/attributes.jl
@@ -1,5 +1,5 @@
 const nofreefns = Set{String}((
-    "jl_genericmemory_copyto",
+        "jl_genericmemory_copyto",
     "utf8proc_toupper",
     "ClientGetAddressableDevices",
     "ClientNumAddressableDevices",
diff --git a/src/llvm/transforms.jl b/src/llvm/transforms.jl
index ec4f1401..a449155a 100644
--- a/src/llvm/transforms.jl
+++ b/src/llvm/transforms.jl
@@ -1,5 +1,5 @@
 function restore_alloca_type!(f::LLVM.Function)
-    replaceAndErase = Tuple{LLVM.AllocaInst,Type, LLVMType}[]
+    replaceAndErase = Tuple{LLVM.AllocaInst, Type, LLVMType}[]
     dl = datalayout(LLVM.parent(f))
 
     for bb in blocks(f), inst in instructions(bb)
@@ -39,12 +39,12 @@ function restore_alloca_type!(f::LLVM.Function)
         cst = al2
         if value_type(cst) != value_type(al)
             cst = bitcast!(b, cst, value_type(al))
-        end        
+        end
         LLVM.replace_uses!(al, cst)
         LLVM.API.LLVMInstructionEraseFromParent(al)
         metadata(inst)["enzymejl_allocart"] = MDNode(LLVM.Metadata[MDString(string(convert(UInt, unsafe_to_pointer(RT))))])
     end
-	return length(replaceAndErase) != 0
+    return length(replaceAndErase) != 0
 end
 
 # Rewrite calls with "jl_roots" to only have the jl_value_t attached and not  { { {} addrspace(10)*, [1 x [2 x i64]], i64, i64 }, [2 x i64] } %unbox110183_replacementA
@@ -2691,7 +2691,7 @@ function removeDeadArgs!(mod::LLVM.Module, tm::LLVM.TargetMachine, post_gc_fixup
     propagate_returned!(mod)
     LLVM.@dispose pb = NewPMPassBuilder() begin
         registerEnzymeAndPassPipeline!(pb)
-		register!(pb, RestoreAllocaType())
+        register!(pb, RestoreAllocaType())
         add!(pb, NewPMModulePassManager()) do mpm
             add!(mpm, NewPMFunctionPassManager()) do fpm
                 add!(fpm, InstCombinePass())
@@ -2720,7 +2720,7 @@ function removeDeadArgs!(mod::LLVM.Module, tm::LLVM.TargetMachine, post_gc_fixup
     LLVM.@dispose pb = NewPMPassBuilder() begin
         registerEnzymeAndPassPipeline!(pb)
         register!(pb, EnzymeAttributorPass())
-		register!(pb, RestoreAllocaType())
+        register!(pb, RestoreAllocaType())
         add!(pb, NewPMModulePassManager()) do mpm
             add!(mpm, NewPMFunctionPassManager()) do fpm
                 add!(fpm, InstCombinePass())
diff --git a/src/typeutils/lltypes.jl b/src/typeutils/lltypes.jl
index 0723dcd7..5b325943 100644
--- a/src/typeutils/lltypes.jl
+++ b/src/typeutils/lltypes.jl
@@ -98,7 +98,7 @@ function strip_tracked_pointers(@nospecialize(T::LLVM.LLVMType))
         for (i, t) in enumerate(LLVM.elements(ty))
             push!(subtypes, strip_tracked_pointers(t))
         end
-        return LLVM.StructType(subtypes; packed=LLVM.ispacked(T))
+        return LLVM.StructType(subtypes; packed = LLVM.ispacked(T))
     end
 
     throw(AssertionError("Unknown composite type"))


EnzymeAttributorPass() = NewPMModulePass("enzyme_attributor", enzyme_attributor_pass!)
ReinsertGCMarkerPass() = NewPMFunctionPass("reinsert_gcmarker", reinsert_gcmarker_pass!)
RestoreAllocaType() = NewPMFunctionPass("restore_alloca_type", restore_alloca_type!)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vchuravy can you check what obvious thing I'm doing wrong here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

register!(pb, RestoreAllocaType())

@oscardssmith
Copy link
Collaborator

With this on 1.12 backports does SciML work?

@wsmoses
Copy link
Member Author

wsmoses commented Jan 16, 2026

Well we need to fix the "I'm silly and didn't register the pass correctly here", then we can test

@gbaraldi
Copy link
Collaborator

gbaraldi commented Jan 16, 2026

Given JuliaLang/julia#60699 is this still necessary? They will probably be both in the same release regardless

@wsmoses
Copy link
Member Author

wsmoses commented Jan 16, 2026

Yes it's separately necessary to determine the type of the alloca

vchuravy and others added 3 commits January 16, 2026 17:16
Return a boolean indicating if replacements and erasures occurred.
@kshyatt kshyatt mentioned this pull request Jan 17, 2026
@codecov
Copy link

codecov bot commented Jan 18, 2026

Codecov Report

❌ Patch coverage is 46.06742% with 48 lines in your changes missing coverage. Please review.
✅ Project coverage is 66.89%. Comparing base (b985fae) to head (fff6238).
⚠️ Report is 9 commits behind head on main.

Files with missing lines Patch % Lines
src/llvm/transforms.jl 30.95% 29 Missing ⚠️
src/typeutils/lltypes.jl 0.00% 19 Missing ⚠️
Additional details and impacted files
@@            Coverage Diff             @@
##             main    #2902      +/-   ##
==========================================
- Coverage   66.98%   66.89%   -0.10%     
==========================================
  Files          58       58              
  Lines       21216    21308      +92     
==========================================
+ Hits        14212    14254      +42     
- Misses       7004     7054      +50     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@github-actions
Copy link
Contributor

github-actions bot commented Jan 18, 2026

Benchmark Results

main fff6238... main / fff6238...
basics/make_zero/namedtuple 0.0522 ± 0.0018 μs 0.0525 ± 0.0019 μs 0.995 ± 0.049
basics/make_zero/struct 0.253 ± 0.0051 μs 0.266 ± 0.0043 μs 0.95 ± 0.024
basics/overhead 4.03 ± 0.93 ns 4.64 ± 0.011 ns 0.868 ± 0.2
basics/remake_zero!/namedtuple 0.237 ± 0.0054 μs 0.24 ± 0.0076 μs 0.989 ± 0.039
basics/remake_zero!/struct 0.24 ± 0.0056 μs 0.236 ± 0.0073 μs 1.01 ± 0.039
fold_broadcast/multidim_sum_bcast/1D 10.3 ± 0.24 μs 10.3 ± 0.42 μs 0.993 ± 0.047
fold_broadcast/multidim_sum_bcast/2D 10.3 ± 0.16 μs 10.3 ± 0.15 μs 1 ± 0.021
time_to_load 0.984 ± 0.0062 s 0.993 ± 0.0067 s 0.992 ± 0.0092

Benchmark Plots

A plot of the benchmark results has been uploaded as an artifact at https://github.com/EnzymeAD/Enzyme.jl/actions/runs/21123951548/artifacts/5172079042.

@wsmoses wsmoses merged commit 055a1ff into main Jan 19, 2026
53 of 60 checks passed
@wsmoses wsmoses deleted the rat branch January 19, 2026 06:02
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants