Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/compiler/optimize.jl b/src/compiler/optimize.jl
index 5651bc55..61bf69f4 100644
--- a/src/compiler/optimize.jl
+++ b/src/compiler/optimize.jl
@@ -79,7 +79,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
function middle_optimize!(second_stage=false)
@dispose pb = NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
- register!(pb, RestoreAllocaType())
+ register!(pb, RestoreAllocaType())
add!(pb, NewPMAAManager()) do aam
add!(aam, ScopedNoAliasAA())
add!(aam, TypeBasedAA())
@@ -102,7 +102,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(fpm, ReassociatePass())
add!(fpm, EarlyCSEPass())
add!(fpm, AllocOptPass())
- add!(fpm, RestoreAllocaType())
+ add!(fpm, RestoreAllocaType())
add!(fpm, NewPMLoopPassManager(use_memory_ssa=true)) do lpm
add!(lpm, LoopIdiomRecognizePass())
@@ -122,7 +122,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
# todo peeling=false?
add!(fpm, LoopUnrollPass(opt_level=2, partial=false)) # what opt level?
add!(fpm, AllocOptPass())
- add!(fpm, RestoreAllocaType())
+ add!(fpm, RestoreAllocaType())
add!(fpm, SROAPass())
add!(fpm, GVNPass())
@@ -137,7 +137,7 @@ function optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine)
add!(fpm, JumpThreadingPass())
add!(fpm, DSEPass())
add!(fpm, AllocOptPass())
- add!(fpm, RestoreAllocaType())
+ add!(fpm, RestoreAllocaType())
add!(fpm, SimplifyCFGPass())
@@ -229,7 +229,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
# merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
# pass.
- add!(fpm, AllocOptPass())
+ add!(fpm, AllocOptPass())
add!(fpm, RestoreAllocaType())
# consider AggressiveInstCombinePass at optlevel > 2
@@ -292,7 +292,7 @@ function addOptimizationPasses!(mpm::LLVM.NewPMPassManager)
# More dead allocation (store) deletion before loop optimization
# consider removing this:
- add!(fpm, AllocOptPass())
+ add!(fpm, AllocOptPass())
add!(fpm, RestoreAllocaType())
# see if all of the constant folding has exposed more loops
@@ -446,13 +446,13 @@ function post_optimize!(mod::LLVM.Module, tm::LLVM.TargetMachine, machine::Bool
registerEnzymeAndPassPipeline!(pb)
register!(pb, ReinsertGCMarkerPass())
register!(pb, SafeAtomicToRegularStorePass())
- register!(pb, RestoreAllocaType())
+ register!(pb, RestoreAllocaType())
add!(pb, NewPMAAManager()) do aam
add!(aam, ScopedNoAliasAA())
add!(aam, TypeBasedAA())
add!(aam, BasicAA())
end
- add!(pb, NewPMModulePassManager()) do mpm
+ add!(pb, NewPMModulePassManager()) do mpm
addOptimizationPasses!(mpm)
if machine
# TODO enable validate_return_roots
diff --git a/src/llvm/attributes.jl b/src/llvm/attributes.jl
index 1b5128ea..3c5e1381 100644
--- a/src/llvm/attributes.jl
+++ b/src/llvm/attributes.jl
@@ -1,5 +1,5 @@
const nofreefns = Set{String}((
- "jl_genericmemory_copyto",
+ "jl_genericmemory_copyto",
"utf8proc_toupper",
"ClientGetAddressableDevices",
"ClientNumAddressableDevices",
diff --git a/src/llvm/transforms.jl b/src/llvm/transforms.jl
index ec4f1401..a449155a 100644
--- a/src/llvm/transforms.jl
+++ b/src/llvm/transforms.jl
@@ -1,5 +1,5 @@
function restore_alloca_type!(f::LLVM.Function)
- replaceAndErase = Tuple{LLVM.AllocaInst,Type, LLVMType}[]
+ replaceAndErase = Tuple{LLVM.AllocaInst, Type, LLVMType}[]
dl = datalayout(LLVM.parent(f))
for bb in blocks(f), inst in instructions(bb)
@@ -39,12 +39,12 @@ function restore_alloca_type!(f::LLVM.Function)
cst = al2
if value_type(cst) != value_type(al)
cst = bitcast!(b, cst, value_type(al))
- end
+ end
LLVM.replace_uses!(al, cst)
LLVM.API.LLVMInstructionEraseFromParent(al)
metadata(inst)["enzymejl_allocart"] = MDNode(LLVM.Metadata[MDString(string(convert(UInt, unsafe_to_pointer(RT))))])
end
- return length(replaceAndErase) != 0
+ return length(replaceAndErase) != 0
end
# Rewrite calls with "jl_roots" to only have the jl_value_t attached and not { { {} addrspace(10)*, [1 x [2 x i64]], i64, i64 }, [2 x i64] } %unbox110183_replacementA
@@ -2691,7 +2691,7 @@ function removeDeadArgs!(mod::LLVM.Module, tm::LLVM.TargetMachine, post_gc_fixup
propagate_returned!(mod)
LLVM.@dispose pb = NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
- register!(pb, RestoreAllocaType())
+ register!(pb, RestoreAllocaType())
add!(pb, NewPMModulePassManager()) do mpm
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, InstCombinePass())
@@ -2720,7 +2720,7 @@ function removeDeadArgs!(mod::LLVM.Module, tm::LLVM.TargetMachine, post_gc_fixup
LLVM.@dispose pb = NewPMPassBuilder() begin
registerEnzymeAndPassPipeline!(pb)
register!(pb, EnzymeAttributorPass())
- register!(pb, RestoreAllocaType())
+ register!(pb, RestoreAllocaType())
add!(pb, NewPMModulePassManager()) do mpm
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, InstCombinePass())
diff --git a/src/typeutils/lltypes.jl b/src/typeutils/lltypes.jl
index 0723dcd7..5b325943 100644
--- a/src/typeutils/lltypes.jl
+++ b/src/typeutils/lltypes.jl
@@ -98,7 +98,7 @@ function strip_tracked_pointers(@nospecialize(T::LLVM.LLVMType))
for (i, t) in enumerate(LLVM.elements(ty))
push!(subtypes, strip_tracked_pointers(t))
end
- return LLVM.StructType(subtypes; packed=LLVM.ispacked(T))
+ return LLVM.StructType(subtypes; packed = LLVM.ispacked(T))
end
throw(AssertionError("Unknown composite type")) |
|
|
||
| EnzymeAttributorPass() = NewPMModulePass("enzyme_attributor", enzyme_attributor_pass!) | ||
| ReinsertGCMarkerPass() = NewPMFunctionPass("reinsert_gcmarker", reinsert_gcmarker_pass!) | ||
| RestoreAllocaType() = NewPMFunctionPass("restore_alloca_type", restore_alloca_type!) |
There was a problem hiding this comment.
@vchuravy can you check what obvious thing I'm doing wrong here?
There was a problem hiding this comment.
register!(pb, RestoreAllocaType())
|
With this on 1.12 backports does SciML work? |
|
Well we need to fix the "I'm silly and didn't register the pass correctly here", then we can test |
|
Given JuliaLang/julia#60699 is this still necessary? They will probably be both in the same release regardless |
|
Yes it's separately necessary to determine the type of the alloca |
Return a boolean indicating if replacements and erasures occurred.
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #2902 +/- ##
==========================================
- Coverage 66.98% 66.89% -0.10%
==========================================
Files 58 58
Lines 21216 21308 +92
==========================================
+ Hits 14212 14254 +42
- Misses 7004 7054 +50 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
Benchmark Results
Benchmark PlotsA plot of the benchmark results has been uploaded as an artifact at https://github.com/EnzymeAD/Enzyme.jl/actions/runs/21123951548/artifacts/5172079042. |
Requires JuliaLang/julia#60695 to be effective.
Fixes numerous 1.12 problems.
Essentially suppose we have a gc alloc of a complex64. LLVMAlloc opt will change this into an [16 x i8]. Then sroa will turn that into 16 individual byte-sized extract/insert -- which we cannot actually handle properly