Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions src/rules/llvmrules.jl
Original file line number Diff line number Diff line change
Expand Up @@ -628,13 +628,11 @@ function arraycopy_common(fwd, B, orig, shadowsrc, gutils, shadowdst; len=nothin

if memory
if fwd
shadowsrc = inttoptr!(B, memoryptr, LLVM.PointerType(LLVM.IntType(8)))
lookup_src = false
shadowsrc = invert_pointer(gutils, memoryptr, B)
else
shadowsrc = invert_pointer(gutils, shadowsrc, B)
if !fwd
shadowsrc = lookup_value(gutils, shadowsrc, B)
end
shadowsrc = invert_pointer(gutils, shadowsrc, B)
shadowsrc = lookup_value(gutils, shadowsrc, B)
end
else
shadowsrc = invert_pointer(gutils, shadowsrc, B)
Expand Down Expand Up @@ -674,12 +672,13 @@ function arraycopy_common(fwd, B, orig, shadowsrc, gutils, shadowdst; len=nothin
# src already has done the lookup from the argument
shadowsrc0 = if lookup_src
if memory
# TODO this may not be at the same offset as the start of the copy, e.g. get_memory_data(src) != memoryptr
get_memory_data(B, evsrc)
else
get_array_data(B, evsrc)
end
else
evsrc
inttoptr!(B, evsrc, LLVM.PointerType(LLVM.IntType(8)))
end

shadowdst0 = if memory
Expand Down Expand Up @@ -781,7 +780,7 @@ end
false,
) #=lookup=#
if is_constant_value(gutils, origops[1])
elSize = get_array_elsz(B, ev)
elSize = get_memory_elsz(B, ev)
elSize = LLVM.zext!(B, elSize, LLVM.IntType(8 * sizeof(Csize_t)))
length = LLVM.mul!(B, len, elSize)
bt = GPUCompiler.backtrace(orig)
Expand All @@ -792,7 +791,7 @@ end
GPUCompiler.@safe_warn "TODO forward zero-set of memorycopy used memset rather than runtime type $btstr"
LLVM.memset!(
B,
ev2,
inttoptr!(B, ev2, LLVM.PointerType(LLVM.IntType(8))),
LLVM.ConstantInt(i8, 0, false),
length,
algn,
Expand Down Expand Up @@ -838,7 +837,7 @@ end
shadowres = LLVM.Value(unsafe_load(shadowR))

len = new_from_original(gutils, origops[3])
memoryptr = new_from_original(gutils, origops[2])
memoryptr = origops[2]
arraycopy_common(true, B, orig, origops[1], gutils, shadowres; len, memoryptr)
end

Expand All @@ -849,7 +848,7 @@ end
origops = LLVM.operands(orig)
if !is_constant_value(gutils, origops[1]) && !is_constant_value(gutils, orig)
len = new_from_original(gutils, origops[3])
memoryptr = new_from_original(gutils, origops[2])
memoryptr = origops[2]
arraycopy_common(false, B, orig, origops[1], gutils, nothing; len, memoryptr)
end

Expand Down
75 changes: 62 additions & 13 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -253,24 +253,73 @@ export codegen_world_age

if VERSION >= v"1.11.0-DEV.1552"


const prevmethodinstance = GPUCompiler.generic_methodinstance

function methodinstance_generator(world::UInt, source, self, ft::Type, tt::Type)
@nospecialize
@assert Core.Compiler.isType(ft) && Core.Compiler.isType(tt)
ft = ft.parameters[1]
tt = tt.parameters[1]

stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ft, :tt), Core.svec())

# look up the method match
method_error = :(throw(MethodError(ft, tt, $world)))
sig = Tuple{ft, tt.parameters...}
min_world = Ref{UInt}(typemin(UInt))
max_world = Ref{UInt}(typemax(UInt))
match = ccall(:jl_gf_invoke_lookup_worlds, Any,
(Any, Any, Csize_t, Ref{Csize_t}, Ref{Csize_t}),
sig, #=mt=# nothing, world, min_world, max_world)
match === nothing && return stub(world, source, method_error)

# look up the method and code instance
mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance},
(Any, Any, Any), match.method, match.spec_types, match.sparams)
ci = Core.Compiler.retrieve_code_info(mi, world)

# prepare a new code info
new_ci = copy(ci)
empty!(new_ci.code)
empty!(new_ci.codelocs)
empty!(new_ci.linetable)
empty!(new_ci.ssaflags)
new_ci.ssavaluetypes = 0

# propagate edge metadata
new_ci.min_world = min_world[]
new_ci.max_world = max_world[]
new_ci.edges = MethodInstance[mi]

# prepare the slots
new_ci.slotnames = Symbol[Symbol("#self#"), :ft, :tt]
new_ci.slotflags = UInt8[0x00 for i = 1:3]

# return the method instance
push!(new_ci.code, Core.Compiler.ReturnNode(mi))
push!(new_ci.ssaflags, 0x00)
push!(new_ci.linetable, GPUCompiler.@LineInfoNode(methodinstance))
push!(new_ci.codelocs, 1)
new_ci.ssavaluetypes += 1

return new_ci
end

@eval function prevmethodinstance(ft, tt)
$(Expr(:meta, :generated_only))
$(Expr(:meta, :generated, methodinstance_generator))
end

# XXX: version of Base.method_instance that uses a function type
@inline function my_methodinstance(@nospecialize(ft::Type), @nospecialize(tt::Type),
world::Integer=tls_world_age())
sig = GPUCompiler.signature_type_by_tt(ft, tt)
# @assert Base.isdispatchtuple(sig) # JuliaLang/julia#52233

mi = ccall(:jl_method_lookup_by_tt, Any,
(Any, Csize_t, Any),
sig, world, #=method_table=# nothing)
mi === nothing && throw(MethodError(ft, tt, world))
mi = mi::MethodInstance

# `jl_method_lookup_by_tt` and `jl_method_lookup` can return a unspecialized mi
if !Base.isdispatchtuple(mi.specTypes)
mi = Core.Compiler.specialize_method(mi.def, sig, mi.sparam_vals)::MethodInstance
if Base.isdispatchtuple(sig) # JuliaLang/julia#52233
return GPUCompiler.methodinstance(ft, tt, world)
else
return prevmethodinstance(ft, tt, world)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You just want to call mi = Core.Compiler.specialize_method(mi.def, sig, mi.sparam_vals)::MethodInstance in this case, but you need a call to which to obtain def

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit: Maybe? This is all pretty much unsound.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean happy to defer that to the experts, it's now just falling back to the old implementation

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old from 1.10 in gpucompiler*** while concurrently I opened a PR to gpucompiler jl to actually fix and not have us have to vendor it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

end

return mi
end
else
import GPUCompiler: methodinstance as my_methodinstance
Expand Down
Loading