Skip to content

Commit 4450769

Browse files
committed
fixup! Various alloc reductions and optimizations
1 parent d52b541 commit 4450769

File tree

8 files changed

+18
-80
lines changed

8 files changed

+18
-80
lines changed

Diff for: src/argument.jl

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ArgPosition() = ArgPosition(true, 0, :NULL)
77
ArgPosition(pos::ArgPosition) = ArgPosition(pos.positional, pos.idx, pos.kw)
88
ispositional(pos::ArgPosition) = pos.positional
99
iskw(pos::ArgPosition) = !pos.positional
10+
raw_position(pos::ArgPosition) = ispositional(pos) ? pos.idx : pos.kw
1011
function pos_idx(pos::ArgPosition)
1112
@assert pos.positional
1213
@assert pos.idx > 0

Diff for: src/sch/Sch.jl

+2-34
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,6 @@ include("util.jl")
3232
include("fault-handler.jl")
3333
include("dynamic.jl")
3434

35-
mutable struct ProcessorCacheEntry
36-
gproc::OSProc
37-
proc::Processor
38-
next::ProcessorCacheEntry
39-
40-
ProcessorCacheEntry(gproc::OSProc, proc::Processor) = new(gproc, proc)
41-
end
42-
Base.isequal(p1::ProcessorCacheEntry, p2::ProcessorCacheEntry) =
43-
p1.proc === p2.proc
44-
function Base.show(io::IO, entry::ProcessorCacheEntry)
45-
entries = 1
46-
next = entry.next
47-
while next !== entry
48-
entries += 1
49-
next = next.next
50-
end
51-
print(io, "ProcessorCacheEntry(pid $(entry.gproc.pid), $(entry.proc), $entries entries)")
52-
end
53-
5435
struct TaskResult
5536
pid::Int
5637
proc::Processor
@@ -82,7 +63,6 @@ Fields:
8263
- `worker_storage_capacity::Dict{Int,Dict{Union{StorageResource,Nothing},UInt64}}` - Maps from worker ID to storage resource capacity
8364
- `worker_loadavg::Dict{Int,NTuple{3,Float64}}` - Worker load average
8465
- `worker_chans::Dict{Int, Tuple{RemoteChannel,RemoteChannel}}` - Communication channels between the scheduler and each worker
85-
- `procs_cache_list::Base.RefValue{Union{ProcessorCacheEntry,Nothing}}` - Cached linked list of processors ready to be used
8666
- `signature_time_cost::Dict{Signature,UInt64}` - Cache of estimated CPU time (in nanoseconds) required to compute calls with the given signature
8767
- `signature_alloc_cost::Dict{Signature,UInt64}` - Cache of estimated CPU RAM (in bytes) required to compute calls with the given signature
8868
- `transfer_rate::Ref{UInt64}` - Estimate of the network transfer rate in bytes per second
@@ -109,7 +89,6 @@ struct ComputeState
10989
worker_storage_capacity::Dict{Int,Dict{Union{StorageResource,Nothing},UInt64}}
11090
worker_loadavg::Dict{Int,NTuple{3,Float64}}
11191
worker_chans::Dict{Int, Tuple{RemoteChannel,RemoteChannel}}
112-
procs_cache_list::Base.RefValue{Union{ProcessorCacheEntry,Nothing}}
11392
signature_time_cost::Dict{Signature,UInt64}
11493
signature_alloc_cost::Dict{Signature,UInt64}
11594
transfer_rate::Ref{UInt64}
@@ -139,7 +118,6 @@ function start_state(deps::Dict, node_order, chan)
139118
Dict{Int,Dict{Union{StorageResource,Nothing},UInt64}}(),
140119
Dict{Int,NTuple{3,Float64}}(),
141120
Dict{Int, Tuple{RemoteChannel,RemoteChannel}}(),
142-
Ref{Union{ProcessorCacheEntry,Nothing}}(nothing),
143121
Dict{Signature,UInt64}(),
144122
Dict{Signature,UInt64}(),
145123
Ref{UInt64}(1_000_000),
@@ -553,8 +531,6 @@ function schedule!(ctx, state, sch_options, procs=procs_to_use(ctx, sch_options)
553531
# Remove processors that aren't yet initialized
554532
procs = filter(p -> haskey(state.worker_chans, Dagger.root_worker_id(p)), procs)
555533

556-
populate_processor_cache_list!(state, procs)
557-
558534
# Schedule tasks
559535
to_fire = @reusable_dict :schedule!_to_fire ScheduleTaskLocation Vector{ScheduleTaskSpec} ScheduleTaskLocation(OSProc(), OSProc()) ScheduleTaskSpec[] 1024
560536
failed_scheduling = @reusable_vector :schedule!_failed_scheduling Union{Thunk,Nothing} nothing 32
@@ -633,6 +609,7 @@ function schedule!(ctx, state, sch_options, procs=procs_to_use(ctx, sch_options)
633609
costs = @reusable_dict :schedule!_costs Processor Float64 OSProc() 0.0 32
634610
estimate_task_costs!(sorted_procs, costs, state, input_procs, task)
635611
empty!(costs) # We don't use costs here
612+
empty!(input_procs)
636613
scheduled = false
637614

638615
# Move our corresponding ThreadProc to be the last considered
@@ -710,22 +687,14 @@ function monitor_procs_changed!(ctx, state, options)
710687
for p in diffps
711688
init_proc(state, p, ctx.log_sink)
712689

713-
# Empty the processor cache list and force reschedule
714-
lock(state.lock) do
715-
state.procs_cache_list[] = nothing
716-
end
690+
# Force reschedule
717691
put!(state.chan, RescheduleSignal())
718692
end
719693

720694
# Cleanup removed procs
721695
diffps = setdiff(old_ps, new_ps)
722696
for p in diffps
723697
cleanup_proc(state, p, ctx.log_sink)
724-
725-
# Empty the processor cache list
726-
lock(state.lock) do
727-
state.procs_cache_list[] = nothing
728-
end
729698
end
730699

731700
@maybelog ctx timespan_finish(ctx, :assign_procs, (;uid=state.uid), nothing)
@@ -741,7 +710,6 @@ function remove_dead_proc!(ctx, state, proc, options)
741710
delete!(state.worker_storage_capacity, proc.pid)
742711
delete!(state.worker_loadavg, proc.pid)
743712
delete!(state.worker_chans, proc.pid)
744-
state.procs_cache_list[] = nothing
745713
end
746714

747715
function finish_task!(ctx, state, node, thunk_failed)

Diff for: src/sch/util.jl

+7-28
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ function can_use_proc(state, task, gproc, proc, opts, scope)
402402
scope = constrain(scope, Dagger.ExactScope(proc))
403403
elseif opts.proclist isa Vector
404404
if !(typeof(proc) in opts.proclist)
405-
@dagdebug task :scope "Rejected $proc: !(typeof(proc) in proclist)"
405+
@dagdebug task :scope "Rejected $proc: !($(typeof(proc)) in proclist)"
406406
return false, scope
407407
end
408408
scope = constrain(scope,
@@ -437,18 +437,18 @@ function can_use_proc(state, task, gproc, proc, opts, scope)
437437
return false, scope
438438
end
439439

440-
# Check against f/args
440+
# Check against function and arguments
441441
Tf = chunktype(task.f)
442442
if !Dagger.iscompatible_func(proc, opts, Tf)
443443
@dagdebug task :scope "Rejected $proc: Not compatible with function type ($Tf)"
444444
return false, scope
445445
end
446-
for (_, arg) in task.inputs
447-
arg = unwrap_weak_checked(arg)
448-
if arg isa Thunk
449-
arg = state.cache[arg]
446+
for arg in task.inputs[2:end]
447+
value = unwrap_weak_checked(Dagger.value(arg))
448+
if value isa Thunk
449+
value = load_result(state, value)
450450
end
451-
Targ = chunktype(arg)
451+
Targ = chunktype(value)
452452
if !Dagger.iscompatible_arg(proc, opts, Targ)
453453
@dagdebug task :scope "Rejected $proc: Not compatible with argument type ($Targ)"
454454
return false, scope
@@ -498,27 +498,6 @@ function has_capacity(state, p, gp, time_util, alloc_util, occupancy, sig)
498498
return true, est_time_util, est_alloc_util, est_occupancy
499499
end
500500

501-
function populate_processor_cache_list!(state, procs)
502-
# Populate the cache if empty
503-
if state.procs_cache_list[] === nothing
504-
current = nothing
505-
for p in map(x->x.pid, procs)
506-
for proc in get_processors(OSProc(p))
507-
next = ProcessorCacheEntry(OSProc(p), proc)
508-
if current === nothing
509-
current = next
510-
current.next = current
511-
state.procs_cache_list[] = current
512-
else
513-
current.next = next
514-
current = next
515-
current.next = state.procs_cache_list[]
516-
end
517-
end
518-
end
519-
end
520-
end
521-
522501
"Like `sum`, but replaces `nothing` entries with the average of non-`nothing` entries."
523502
function impute_sum(xs)
524503
total = 0

Diff for: src/utils/logging-events.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ function (ta::TaskArguments)(ev::Event{:finish})
148148
if ev.category == :move
149149
args = Pair{Union{Symbol,Int},Dagger.LoggedMutableObject}[]
150150
thunk_id = ev.id.thunk_id::Int
151-
pos = ev.id.position::Union{Symbol,Int}
151+
pos = Dagger.raw_position(ev.id.position::Dagger.ArgPosition)::Union{Symbol,Int}
152152
arg = ev.timeline.data
153153
if ismutable(arg)
154154
push!(args, pos => Dagger.objectid_or_chunkid(arg))
@@ -174,7 +174,7 @@ function (ta::TaskArgumentMoves)(ev::Event{:start})
174174
data = ev.timeline.data
175175
if ismutable(data)
176176
thunk_id = ev.id.thunk_id::Int
177-
position = ev.id.position::Union{Symbol,Int}
177+
position = Dagger.raw_position(ev.id.position::Dagger.ArgPosition)::Union{Symbol,Int}
178178
d = get!(Dict{Union{Int,Symbol},Dagger.LoggedMutableObject}, ta.pre_move_args, thunk_id)
179179
d[position] = Dagger.objectid_or_chunkid(data)
180180
end
@@ -186,7 +186,7 @@ function (ta::TaskArgumentMoves)(ev::Event{:finish})
186186
post_data = ev.timeline.data
187187
if ismutable(post_data)
188188
thunk_id = ev.id.thunk_id::Int
189-
position = ev.id.position::Union{Symbol,Int}
189+
position = Dagger.raw_position(ev.id.position::Dagger.ArgPosition)::Union{Symbol,Int}
190190
if haskey(ta.pre_move_args, thunk_id)
191191
d = ta.pre_move_args[thunk_id]
192192
if haskey(d, position)

Diff for: src/utils/reuse.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ function maybetake!(cache::ReusableCache{T}, len=nothing) where T
1515
for idx in 1:length(cache.used)
1616
cache.used[idx] && continue
1717
if cache.sized && isassigned(cache.cache, idx) && length(cache.cache[idx]) != len
18-
@debug "Skipping length $(length(cache.cache[idx])) (want length $len) @ $idx"
18+
@dagdebug nothing :reuse "Skipping length $(length(cache.cache[idx])) (want length $len) @ $idx"
1919
continue
2020
end
2121
cache.used[idx] = true
2222
if !isassigned(cache.cache, idx)
2323
if cache.sized
24-
@debug "Allocating length $len @ $idx"
24+
@dagdebug nothing :reuse "Allocating length $len @ $idx"
2525
cache.cache[idx] = alloc!(T, len)
2626
else
2727
cache.cache[idx] = alloc!(T)

Diff for: test/logging.jl

-10
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,6 @@ import Colors, GraphViz, DataFrames, Plots, JSON3
134134
@test any(e->haskey(e, :fire), esat)
135135
@test any(e->haskey(e, :take), esat)
136136
@test any(e->haskey(e, :finish), esat)
137-
if Threads.nthreads() == 1
138-
if nprocs() > 1
139-
# Note: May one day be true as scheduler evolves
140-
@test !any(e->haskey(e, :compute), esat)
141-
@test !any(e->haskey(e, :move), esat)
142-
psat = l1[:psat]
143-
# Note: May become false
144-
@test all(e->length(e) == 0, psat)
145-
end
146-
end
147137

148138
had_psat_proc = 0
149139
for wo in filter(w->w != 1, keys(logs))

Diff for: test/thunk.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ end
284284
t1 = Dagger.@spawn 1+"fail"
285285
Dagger.@spawn t1+1
286286
end
287-
@test_throws_unwrap (Dagger.ThunkFailedException, MethodError) fetch(t2)
287+
@test_throws_unwrap (Dagger.DTaskFailedException, MethodError) fetch(t2)
288288
end
289289
end
290290
if nprocs() > 1

Diff for: test/util.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ function _test_throws_unwrap(terr, ex; to_match=[])
1818
match_expr = Expr(:block)
1919
for m in to_match
2020
if m.head == :(=)
21-
lhs, rhs = replace_obj!(m.args[1], oerr), m.args[2]
21+
lhs, rhs = replace_obj!(m.args[1], rerr), m.args[2]
2222
push!(match_expr.args, :(@test $lhs == $rhs))
2323
elseif m.head == :call
2424
fn = m.args[1]
25-
lhs, rhs = replace_obj!(m.args[2], oerr), m.args[3]
25+
lhs, rhs = replace_obj!(m.args[2], rerr), m.args[3]
2626
if fn == :(<)
2727
push!(match_expr.args, :(@test startswith($lhs, $rhs)))
2828
elseif fn == :(>)

0 commit comments

Comments
 (0)