From 4823207540818ad85a24d19b865b97ae701fa08c Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 4 Jan 2025 12:16:55 +0100 Subject: [PATCH 01/51] work on algorithm efficiency --- src/gridrsp.jl | 168 ++++++++++++++++++++-------------- src/randomizedshortestpath.jl | 168 ++++++++++++++++++++-------------- 2 files changed, 198 insertions(+), 138 deletions(-) diff --git a/src/gridrsp.jl b/src/gridrsp.jl index c3aa298..5f397a0 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -54,14 +54,13 @@ DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) Compute RSP betweenness of all nodes weighted by source and target qualities. """ -function betweenness_qweighted(grsp::GridRSP) +function betweenness_qweighted(grsp::GridRSP; kw...) g = grsp.g - betvec = RSP_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes) + betvec = RSP_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) bet = fill(NaN, g.nrows, g.ncols) for (i, v) in enumerate(betvec) bet[g.id_to_grid_coordinate_list[i]] = v end - return _maybe_raster(bet, grsp) end @@ -71,10 +70,9 @@ end Compute RSP betweenness of all edges weighted by source and target qualities. Returns a sparse matrix where element (i,j) is the betweenness of edge (i,j). """ -function edge_betweenness_qweighted(grsp::GridRSP) +function edge_betweenness_qweighted(grsp::GridRSP; kw...) g = grsp.g - betmatrix = RSP_edge_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes) - return betmatrix + return RSP_edge_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) end """ @@ -90,8 +88,10 @@ The optional `diagvalue` element specifies which value to use for the diagonal o function betweenness_kweighted(grsp::GridRSP; connectivity_function=expected_cost, distance_transformation=nothing, - diagvalue=nothing) - + diagvalue=nothing, + proximities=connectivity_function(grsp), + kw... +) g = grsp.g # Check that distance_transformation function has been passed if no cost function is saved @@ -103,20 +103,17 @@ function betweenness_kweighted(grsp::GridRSP; end end - proximities = connectivity_function(grsp) - if connectivity_function <: DistanceFunction map!(distance_transformation, proximities, proximities) end - if diagvalue !== nothing + if !isnothing(diagvalue) for (j, i) in enumerate(targetnodes) proximities[i, j] = diagvalue end end - betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes) - + betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) bet = fill(NaN, g.nrows, g.ncols) for (i, v) in enumerate(betvec) bet[g.id_to_grid_coordinate_list[i]] = v @@ -138,17 +135,23 @@ end function edge_betweenness_kweighted(grsp::GridRSP; distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing, + expected_cost=nothing, + kw... ) - g = grsp.g - proximities = map(distance_transformation, expected_cost(grsp)) - if diagvalue !== nothing - for (j, i) in enumerate(g.targetnodes) - proximities[i, j] = diagvalue - end + if isnothing(expected_cost) + expected_cost = ConScape.expected_cost(grsp; kw...) end + proximities = map(distance_transformation, expected_cost) + _set_diagonal!(proximities, g, diagvalue) - betmatrix = RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes) - return betmatrix + return RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) +end + +_set_diagonal!(proximities, g, diagvalue::Nothing) = nothing +function _set_diagonal!(proximities, g, diagvalue) + for (j, i) in enumerate(g.targetnodes) + proximities[i, j] = diagvalue + end end """ @@ -156,28 +159,43 @@ end Compute RSP expected costs from all nodes. """ -expected_cost(grsp::GridRSP) = - RSP_expected_cost(grsp.W, grsp.g.costmatrix, grsp.Z, grsp.g.targetnodes) +expected_cost(grsp::GridRSP; kw...) = + RSP_expected_cost(grsp.W, grsp.g.costmatrix, grsp.Z, grsp.g.targetnodes; kw...) -free_energy_distance(grsp::GridRSP) = - RSP_free_energy_distance(grsp.Z, grsp.θ, grsp.g.targetnodes) +free_energy_distance(grsp::GridRSP; kw...) = + RSP_free_energy_distance(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) survival_probability(grsp::GridRSP) = - RSP_survival_probability(grsp.Z, grsp.θ, grsp.g.targetnodes) + RSP_survival_probability(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -power_mean_proximity(grsp::GridRSP) = - RSP_power_mean_proximity(grsp.Z, grsp.θ, grsp.g.targetnodes) +power_mean_proximity(grsp::GridRSP; kw...) = + RSP_power_mean_proximity(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -least_cost_distance(grsp::GridRSP) = least_cost_distance(grsp.g) +least_cost_distance(grsp::GridRSP) = least_cost_distance(grsp.g; kw...) """ mean_kl_divergence(grsp::GridRSP)::Float64 Compute the mean Kullback–Leibler divergence between the free energy distances and the RSP expected costs for `grsp::GridRSP`. """ -function mean_kl_divergence(grsp::GridRSP) +function mean_kl_divergence(grsp::GridRSP; + free_energy_distance=nothing, + expected_cost=nothing, + kw... +) g = grsp.g - return g.qs' * (RSP_free_energy_distance(grsp.Z, grsp.θ, g.targetnodes) - expected_cost(grsp)) * g.qt * grsp.θ + if isnothing(free_energy_distance) + free_energy_distance = RSP_free_energy_distance(grsp.Z, grsp.θ, g.targetnodes; kw...) + end + if isnothing(expected_cost) + expected_cost = ConScape.expected_cost(grsp; kw...) + end + return mean_kl_divergence(grsp::GridRSP, free_energy_distance, expected_cost) +end + +function mean_kl_divergence(grsp::GridRSP, free_energy_distance, expected_cost) + g = grsp.g + return g.qs' * (free_energy_distance - expected_cost) * g.qt * grsp.θ end @@ -186,14 +204,16 @@ end Compute the mean Kullback–Leibler divergence between the least-cost path and the random path distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. """ -function mean_lc_kl_divergence(grsp::GridRSP) +function mean_lc_kl_divergence(grsp::GridRSP; kw...) g = grsp.g - div = hcat([least_cost_kl_divergence(g.costmatrix, grsp.Pref, i) for i in g.targetnodes]...) + # TODO make this a loop + div = hcat([least_cost_kl_divergence(g.costmatrix, grsp.Pref, i; kw...) for i in g.targetnodes]...) return g.qs' * div * g.qt end -function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, targetnode::Integer) - +function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, targetnode::Integer; + kw... +) n = size(C, 1) graph = SimpleWeightedDiGraph(C) if !(1 <= targetnode <= n) @@ -236,21 +256,20 @@ function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, tar return kl_div end - """ least_cost_kl_divergence(grsp::GridRSP, target::Tuple{Int,Int}) Compute the least cost Kullback-Leibler divergence from each cell in the g in `h` to the `target` cell. """ -function least_cost_kl_divergence(grsp::GridRSP, target::Tuple{Int,Int}) +function least_cost_kl_divergence(grsp::GridRSP, target::Tuple{Int,Int}; kw...) g = grsp.g targetnode = findfirst(isequal(CartesianIndex(target)), g.id_to_grid_coordinate_list) if targetnode === nothing throw(ArgumentError("target cell not found")) end - div = least_cost_kl_divergence(g.costmatrix, grsp.Pref, targetnode) + div = least_cost_kl_divergence(g.costmatrix, grsp.Pref, targetnode; kw...) return reshape(div, g.nrows, g.ncols) end @@ -288,8 +307,9 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; distance_transformation=nothing, diagvalue=nothing, θ::Union{Nothing,Real}=nothing, - approx::Bool=false) - + approx::Bool=false, + kw... +) # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction if grsp isa Grid @@ -317,9 +337,12 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; map!(distance_transformation, S, S) end - return connected_habitat(grsp, S, diagvalue=diagvalue) + return connected_habitat(grsp, S; diagvalue, kw...) end -function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; diagvalue::Union{Nothing,Real}=nothing) +function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; + diagvalue::Union{Nothing,Real}=nothing, + kw... +) g = _get_grid(grsp) if diagvalue !== nothing @@ -328,7 +351,7 @@ function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; diagvalue::Unio end end - funvec = connected_habitat(g.qs, g.qt, S) + funvec = connected_habitat(g.qs, g.qt, S; kw...) func = fill(NaN, g.nrows, g.ncols) for (ij, x) in zip(g.id_to_grid_coordinate_list, funvec) @@ -337,7 +360,6 @@ function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; diagvalue::Unio return _maybe_raster(func, grsp) end - function connected_habitat(grsp::GridRSP, cell::CartesianIndex{2}; distance_transformation=nothing, @@ -379,7 +401,7 @@ function connected_habitat(grsp::GridRSP, newh = GridRSP(newg; θ=grsp.θ) - return connected_habitat(newh; diagvalue=diagvalue, distance_transformation=distance_transformation) + return connected_habitat(newh; diagvalue, distance_transformation) end """ @@ -389,14 +411,22 @@ end diagvalue=nothing, tol=1e-14) -Compute the largest eigenvalue triple (left vector, value, and right vector) of the quality scaled proximities with respect to the distance/proximity measure defined by `connectivity_function`. If `connectivity_function` is a distance measure then the distances are transformed to proximities by `distance_transformation` which defaults to the inverse of the `costfunction` in the underlying `Grid` (if defined). Optionally, the diagonal values of the proximity matrix may be set to `diagvalue`. The `tol` argument specifies the convergence tolerance in the Arnoldi based eigensolver. +Compute the largest eigenvalue triple (left vector, value, and right vector) of the +quality scaled proximities with respect to the distance/proximity measure defined by +`connectivity_function`. + +If `connectivity_function` is a distance measure then the distances are transformed +to proximities by `distance_transformation` which defaults to the inverse of the `costfunction` +in the underlying `Grid` (if defined). Optionally, the diagonal values of the proximity matrix may +be set to `diagvalue`. The `tol` argument specifies the convergence tolerance in the Arnoldi based eigensolver. """ function LinearAlgebra.eigmax(grsp::GridRSP; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, - tol=1e-14) - + tol=1e-14, + kw... +) g = grsp.g # Check that distance_transformation function has been passed if no cost function is saved @@ -408,7 +438,7 @@ function LinearAlgebra.eigmax(grsp::GridRSP; end end - S = connectivity_function(grsp) + S = connectivity_function(grsp; kw...) if connectivity_function <: DistanceFunction map!(distance_transformation, S, S) @@ -421,16 +451,16 @@ function LinearAlgebra.eigmax(grsp::GridRSP; end # quality scaled proximity matrix - qSq = qˢ .* S .* qᵗ' + qSq = g.qs .* S .* g.qt' # square submatrix defined by extracting the rows corresponding to landmarks - qSq₀₀ = qSq[targetnodes,:] + qSq₀₀ = qSq[g.targetnodes, :] # size of the full problem n = size(g.affinities, 1) # node ids for the non-landmarks - p₁ = setdiff(1:n, targetnodes) + p₁ = setdiff(1:n, g.targetnodes) # use an Arnoldi based eigensolver to compute the largest (absolute) eigenvalue and right vector (of submatrix) Fps = partialschur(qSq₀₀, nev=1, tol=tol) @@ -487,7 +517,7 @@ function LinearAlgebra.eigmax(grsp::GridRSP; # construct full right vector vʳ = fill(NaN, n) - vʳ[targetnodes] = vʳ₀ + vʳ[g.targetnodes] = vʳ₀ vʳ[p₁] = qSq[p₁,:]*vʳ₀/λ₀[1] # compute left vector (of submatrix) by shift-invert @@ -497,9 +527,9 @@ function LinearAlgebra.eigmax(grsp::GridRSP; # construct full left vector vˡ = zeros(n) - vˡ[targetnodes] = vˡ₀ + vˡ[g.targetnodes] = vˡ₀ - return vˡ, λ₀[1], vʳ + return vˡ, λ₀=λ₀[1], vʳ end """ @@ -516,28 +546,24 @@ the cell to `qˢvalue` and `qᵗvalue` respectively. It is required that `avalue positive to avoid that the graph becomes disconnected. """ function criticality(grsp::GridRSP; - distance_transformation=nothing, - diagvalue=nothing, - avalue=floatmin(), - qˢvalue=0.0, - qᵗvalue=0.0) - + distance_transformation=nothing, + diagvalue=nothing, + avalue=floatmin(), + qˢvalue=0.0, + qᵗvalue=0.0, + kw... +) g = grsp.g - nl = length(targetidx) + nl = length(g.targetidx) reference_connected_habitat = sum(connected_habitat(grsp; - distance_transformation=distance_transformation, diagvalue=diagvalue + distance_transformation, diagvalue, kw... )) critvec = fill(reference_connected_habitat, nl) @progress name="Computing criticality..." for i in 1:nl - critvec[i] -= sum(connected_habitat( - grsp, - g.targetidx[i]; - distance_transformation=distance_transformation, - diagvalue=diagvalue, - avalue=avalue, - qˢvalue=qˢvalue, - qᵗvalue=qᵗvalue)) + critvec[i] = sum(connected_habitat(grsp, g.targetidx[i]; + distance_transformation, diagvalue, avalue, qˢvalue, qᵗvalue, kw... + )) end landscape = fill(NaN, size(grsp.g)) diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index d4edbbf..06777e8 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -1,3 +1,19 @@ +# Generate the sparse diagonal rhs matrix +function sparse_rhs(targetnodes, n) + sparse(targetnodes, + 1:length(targetnodes), + 1.0, + n, + length(targetnodes), + ) +end + +function _inv(Z) + Zⁱ = inv.(Z) + Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... + return Zⁱ +end + _Pref(A::SparseMatrixCSC) = Diagonal(inv.(vec(sum(A, dims=2)))) * A function _W(Pref::SparseMatrixCSC, θ::Real, C::SparseMatrixCSC) @@ -17,17 +33,19 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, Z::AbstractMatrix, qˢ::AbstractVector, qᵗ::AbstractVector, - targetnodes::AbstractVector) - Zⁱ = inv.(Z) - Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... - - qˢZⁱqᵗ = qˢ .* Zⁱ .* qᵗ' + targetnodes::AbstractVector; + Zⁱ=_inv(Z), + workspace1=zeros(size(Z)), + solver=nothing, + kw... +) + qˢZⁱqᵗ = workspace1 + qˢZⁱqᵗ .= qˢ .* Zⁱ .* qᵗ' sumqˢ = sum(qˢ) for j in axes(Z, 2) - qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] + qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] end - - ZqˢZⁱqᵗZt = (I - W)'\qˢZⁱqᵗ + ZqˢZⁱqᵗZt = solve_ldiv!(solver, (I - W)', qˢZⁱqᵗ) ZqˢZⁱqᵗZt .*= Z return sum(ZqˢZⁱqᵗZt, dims=2) # diag(Z * ZqˢZⁱqᵗ') @@ -39,9 +57,11 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, qˢ::AbstractVector, # Source qualities qᵗ::AbstractVector, # Target qualities S::AbstractMatrix, # Matrix of proximities - landmarks::AbstractVector) - - + landmarks::AbstractVector; + Zⁱ=_inv(Z), + workspace=zeros(size(Z)), + kw... +) axis1, axis2 = axes(Z) if axis1 != axes(qˢ, 1) throw(DimensionMismatch("")) @@ -56,23 +76,23 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, throw(DimensionMismatch("")) end - Zⁱ = inv.(Z) - Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... - - KZⁱ = qˢ .* S .* qᵗ' + KZⁱ = workspace + KZⁱ .= qˢ .* S .* qᵗ' # If any of the values of KZⁱ is above one then there is a risk of overflow. # Hence, we scale the matrix and apply the scale factor by the end of the # computation. λ = max(1.0, maximum(KZⁱ)) - k = vec(sum(KZⁱ, dims=1)) * inv(λ) + s = sum(KZⁱ, dims=1) + s .*= inv(λ) + k = vec(s) KZⁱ .*= inv.(λ) .* Zⁱ for j in axis2 KZⁱ[landmarks[j], j] -= k[j] .* Zⁱ[landmarks[j], j] end - ZKZⁱt = (I - W)'\KZⁱ + ZKZⁱt = solve_ldiv!(solver, (I - W)', KZⁱ) ZKZⁱt .*= λ .* Z return vec(sum(ZKZⁱt, dims=2)) # diag(Z * KZⁱ') @@ -82,36 +102,36 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, Z::AbstractMatrix, qˢ::AbstractVector, qᵗ::AbstractVector, - targetnodes::AbstractVector) - - Zⁱ = inv.(Z) - Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... - - # FIXME: This should be only done when actually size(Z,2) < size(Z,1)/K where K ≈ 10 or so. + targetnodes::AbstractVector; + Zⁱ=_inv(Z), + workspace1=zeros(size(Z)), + solver=nothing, + kw... +) + + # FIXME: This should be only done when actually size(Z, 2) < size(Z, 1)/K where K ≈ 10 or so. # Otherwise we just compute many of the elements of Z twice... - if size(Z,2) < size(Z,1) - Zrows = ((I - W')\Matrix(sparse(targetnodes, - 1:length(targetnodes), - 1.0, - size(W, 1), - length(targetnodes))))' + if size(Z, 2) < size(Z, 1) + B = workspace1 + B .= sparse_rhs(targetnodes, size(W, 1)) + Zrows = solve_ldiv!(solver, (I - W'), B)' else Zrows = Z end - n = size(W,1) - + n = size(W, 1) diagZⁱ = [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] sumqˢ = sum(qˢ) - Zrows = Zrows .* (sumqˢ*qᵗ.*diagZⁱ) + Zrows .*= sumqˢ*qᵗ .* diagZⁱ qˢZⁱqᵗ = qˢ .* Zⁱ .* qᵗ' - QZⁱᵀZ = qˢZⁱqᵗ'/(I - W) + QZⁱᵀZ = qˢZⁱqᵗ' / (I - W) - RHS = QZⁱᵀZ-Zrows + RHS = workspace1 + RHS .= QZⁱᵀZ .- Zrows edge_betweennesses = copy(W) @@ -120,7 +140,7 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, for j in findall(W[i,:].>0) # edge_betweennesses[i,j] = W[i,j] .* Zqt[j,:]'* (ZᵀZⁱ_minus_diag * Z[j,:])[1] - edge_betweennesses[i,j] = W[i,j] .* (Z[j,:]' * RHS[:,i])[1] + edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(RHS, :, i))[1] end end @@ -132,28 +152,29 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, qˢ::AbstractVector, qᵗ::AbstractVector, K::AbstractMatrix, # Matrix of proximities - targetnodes::AbstractVector) - - Zⁱ = inv.(Z) - Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... - + targetnodes::AbstractVector; + Zⁱ=_inv(Z), + workspace1=zeros(size(Z)), + workspace2=zeros(size(Z)), + solver=nothing, + kw... +) K̂ = qˢ .* K .* qᵗ' k̂ = vec(sum(K̂, dims=1)) K̂ .*= Zⁱ + K̂ᵀZ = K̂' / (I - W) - K̂ᵀZ = K̂'/(I - W) - - k̂diagZⁱ = k̂.*[Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] + k̂diagZⁱ = k̂ .* [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] - Zrows = (I - W')\Matrix(sparse(targetnodes, - 1:length(targetnodes), - 1.0, - size(W, 1), - length(targetnodes))) - k̂diagZⁱZ = k̂diagZⁱ .* Zrows' + B = workspace1 + B .= sparse_rhs(targetnodes, size(W, 1)) + Zrows = solve_ldiv!(solver, (I - W'), B) + k̂diagZⁱZ = workspace2 + k̂diagZⁱZ .= k̂diagZⁱ .* Zrows' - K̂ᵀZ_minus_diag = K̂ᵀZ - k̂diagZⁱZ + K̂ᵀZ_minus_diag = workspace2 + K̂ᵀZ_minus_diag .= K̂ᵀZ .- k̂diagZⁱZ edge_betweennesses = copy(W) @@ -161,8 +182,8 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, # ZᵀZⁱ_minus_diag = ZᵀKZⁱ[i,:] .- (k.*Z[targetnodes,i].*(Zⁱ[targetnodes,targetnodes]))' # ZᵀZⁱ_minus_diag = Z[:,i]'*K̂ .- (k.*Z[targetnodes,i].*diag(Zⁱ))' - for j in findall(W[i,:].>0) - edge_betweennesses[i,j] = W[i,j] .* (Z[j,:]'*K̂ᵀZ_minus_diag[:,i])[1] + for j in findall(>(0), view(W, i, :)) + edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(K̂ᵀZ_minus_diag, :, i))[1] end end @@ -170,12 +191,13 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, end - - function RSP_expected_cost(W::SparseMatrixCSC, C::SparseMatrixCSC, Z::AbstractMatrix, - landmarks::AbstractVector) + landmarks::AbstractVector; + solver=nothing, + kw... +) if axes(W) != axes(C) throw(DimensionMismatch("")) @@ -184,13 +206,13 @@ function RSP_expected_cost(W::SparseMatrixCSC, throw(DimensionMismatch("")) end if axes(Z, 2) != axes(landmarks, 1) - Z = Z[:,landmarks] + Z = Z[:, landmarks] end if size(Z, 1) == size(Z, 2) - C̄ = Z*((C .* W)*Z) + C̄ = Z * ((C .* W) * Z) else - C̄ = (I - W)\((C .* W)*Z) + C̄ = solve_ldiv!(solver, (I - W), ((C .* W) * Z)) end C̄ ./= Z @@ -201,20 +223,32 @@ function RSP_expected_cost(W::SparseMatrixCSC, return C̄ end -RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector) = - -log.(RSP_survival_probability(Z, θ, landmarks))./θ +function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; + survival_probability=nothing, kw... +) + if isnothing(survival_probability) + survival_probability = RSP_survival_probability(Z, θ, landmarks; kw...) + end + return -log.(max.(zero(eltype(Z)), survival_probability)) ./ θ +end -RSP_survival_probability(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector) = +function RSP_survival_probability(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; kw...) Z .* inv.([Z[i, j] for (j, i) in enumerate(landmarks)])' +end -RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector) = - RSP_survival_probability(Z, θ, landmarks).^(1/θ) +function RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; + survival_probability=RSP_survival_probability(Z, θ, landmarks; kw...), + kw... +) + survival_probability .^ (1 / θ) +end function connected_habitat(qˢ::AbstractVector, # Source qualities qᵗ::AbstractVector, # Target qualities - S::AbstractMatrix) # Matrix of proximities - - return qˢ .* (S*qᵗ) + S::AbstractMatrix; # Matrix of proximities + kw... +) + return qˢ .* (S * qᵗ) end # Returns the directed RSP dissimilarity and directed free energy distance for all nodes to a given target @@ -287,7 +321,7 @@ function bellman_ford(Pref::SparseMatrixCSC, C::SparseMatrixCSC, θ::Real, targe continue end # check if the free energy and the RSP have converged - convergence=(maximum(abs, φ - φ_1)/maximum(φ) < 1e-8) & (maximum(abs, c̄ - c̄_1)/maximum(c̄) < 1e-8) + convergence = (maximum(abs, φ - φ_1) / maximum(φ) < 1e-8) & (maximum(abs, c̄ - c̄_1) / maximum(c̄) < 1e-8) end return c̄, φ end From b7881c0d3781f1aff1dd097e635811d2e42d7bf7 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 13 Jan 2025 21:42:50 +0100 Subject: [PATCH 02/51] fix tests --- src/graph_measure.jl | 48 ++++++++++++++++++++++++++----- src/gridrsp.jl | 47 ++++++++++++++++++------------ src/randomizedshortestpath.jl | 54 ++++++++++++++++++----------------- src/solvers.jl | 6 ++-- test/problem.jl | 7 +++-- test/runtests.jl | 5 ++-- 6 files changed, 108 insertions(+), 59 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 329338b..1a9df00 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -79,12 +79,13 @@ needs_connectivity(::Criticality) = NeedsConnectivity() # This is where things actually happen # # Add dispatch on connectivity measure -compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}) = - compute(needs_connectivity(gm), gm, p, g) +compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = + compute(needs_connectivity(gm), gm, p, g; kw...) function compute(::NeedsConnectivity, gm::GraphMeasure, p::AbstractProblem, - g::Union{Grid,GridRSP} + g::Union{Grid,GridRSP}; + workspace_kw... ) cm = p.connectivity_measure distance_transformation = cm.distance_transformation @@ -92,16 +93,49 @@ function compute(::NeedsConnectivity, # Handle multiple distance transformations if distance_transformation isa NamedTuple map(distance_transformation) do dt - graph_function(gm)(g; keywords(gm, p)..., distance_transformation=dt, connectivity_function) + graph_function(gm)(g; + keywords(gm, p)..., + distance_transformation=dt, + connectivity_function, + workspace_kw... + ) end else - graph_function(gm)(g; keywords(gm, p)..., distance_transformation=dt, connectivity_function) + graph_function(gm)(g; + keywords(gm, p)..., + distance_transformation=dt, + connectivity_function, + workspace_kw... + ) end end function compute(::NoConnectivity, gm::GraphMeasure, p::AbstractProblem, - g::Union{Grid,GridRSP} + g::Union{Grid,GridRSP}; + workspace_kw... ) - graph_function(gm)(g; keywords(gm, p)...) + graph_function(gm)(g; keywords(gm, p)..., workspace_kw...) +end + +# Workspace allocation traits +needs_inv(::GraphMeasure) = false +needs_inv(::BetweennessMeasure) = true +needs_workspace(::GraphMeasure) = false +needs_workspace(::BetweennessMeasure) = true + +function _setup_workspace(p::AbstractProblem, grsp::GridRSP) + gm = p.graph_measures + workspace = if mapreduce(needs_workspace, |, gm; init=false) + similar(grsp.Z) + else + nothing + end + Zⁱ = if mapreduce(needs_inv, |, gm; init=false) + _inv(grsp.Z) + else + nothing + end + + return (; Zⁱ, workspace) end \ No newline at end of file diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 5f397a0..20e0031 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -108,7 +108,7 @@ function betweenness_kweighted(grsp::GridRSP; end if !isnothing(diagvalue) - for (j, i) in enumerate(targetnodes) + for (j, i) in enumerate(g.targetnodes) proximities[i, j] = diagvalue end end @@ -138,6 +138,7 @@ function edge_betweenness_kweighted(grsp::GridRSP; expected_cost=nothing, kw... ) + g = grsp.g if isnothing(expected_cost) expected_cost = ConScape.expected_cost(grsp; kw...) end @@ -165,13 +166,13 @@ expected_cost(grsp::GridRSP; kw...) = free_energy_distance(grsp::GridRSP; kw...) = RSP_free_energy_distance(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -survival_probability(grsp::GridRSP) = +survival_probability(grsp::GridRSP; kw...) = RSP_survival_probability(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) power_mean_proximity(grsp::GridRSP; kw...) = RSP_power_mean_proximity(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -least_cost_distance(grsp::GridRSP) = least_cost_distance(grsp.g; kw...) +least_cost_distance(grsp::GridRSP; kw...) = least_cost_distance(grsp.g; kw...) """ mean_kl_divergence(grsp::GridRSP)::Float64 @@ -346,7 +347,7 @@ function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; g = _get_grid(grsp) if diagvalue !== nothing - for (j, i) in enumerate(targetnodes) + for (j, i) in enumerate(g.targetnodes) S[i, j] = diagvalue end end @@ -368,36 +369,46 @@ function connected_habitat(grsp::GridRSP, qˢvalue=0.0, qᵗvalue=0.0) + g = grsp.g + if avalue <= 0.0 throw("Affinity value has to be positive. Otherwise the graph will become disconnected.") end # Compute (linear) node indices from (cartesian) grid indices - node = findfirst(isequal(cell), grsp.g.id_to_grid_coordinate_list) + node = findfirst(isequal(cell), g.id_to_grid_coordinate_list) # Check that cell is in targetidx - if cell ∉ targetidx + if cell ∉ g.targetidx throw(ArgumentError("Computing adjusted connected_habitat is only supported for target cells")) end - affinities = copy(grsp.g.affinities) + affinities = copy(g.affinities) affinities[:, node] .= ifelse.(iszero.(affinities[:, node]), 0, avalue) affinities[node, :] .= ifelse.(iszero.(affinities[node, :]), 0, avalue) - newsource_qualities = copy(grsp.g.source_qualities) + newsource_qualities = copy(g.source_qualities) newsource_qualities[cell] = qˢvalue - newtarget_qualities = copy(grsp.g.target_qualities) + newtarget_qualities = copy(g.target_qualities) newtarget_qualities[cell] = qᵗvalue - newg = Grid(grsp.g.nrows, - grsp.g.ncols, + newtargetidx, newtargetnodes = _targetidx_and_nodes(newtarget_qualities, g.id_to_grid_coordinate_list) + newqs = [newsource_qualities[i] for i in g.id_to_grid_coordinate_list] + newqt = [newtarget_qualities[i] for i in g.id_to_grid_coordinate_list ∩ newtargetidx] + + newg = Grid(g.nrows, + g.ncols, affinities, - grsp.g.costfunction, - grsp.g.costfunction === nothing ? grsp.g.costmatrix : mapnz(grsp.g.costfunction, affinities), - grsp.g.id_to_grid_coordinate_list, + g.costfunction, + g.costfunction === nothing ? g.costmatrix : mapnz(g.costfunction, affinities), + g.id_to_grid_coordinate_list, newsource_qualities, newtarget_qualities, - dims(grsp)) + newtargetidx, + newtargetnodes, + newqs, + newqt, + dims(g)) newh = GridRSP(newg; θ=grsp.θ) @@ -522,14 +533,14 @@ function LinearAlgebra.eigmax(grsp::GridRSP; # compute left vector (of submatrix) by shift-invert Flu = lu(qSq₀₀ - λ₀[1]*I) - vˡ₀ = ldiv!(Flu', rand(length(targetidx))) + vˡ₀ = ldiv!(Flu', rand(length(g.targetidx))) rmul!(vˡ₀, inv(vˡ₀[1])) # construct full left vector vˡ = zeros(n) vˡ[g.targetnodes] = vˡ₀ - return vˡ, λ₀=λ₀[1], vʳ + return (vˡ, λ₀=λ₀[1], vʳ) end """ @@ -567,7 +578,7 @@ function criticality(grsp::GridRSP; end landscape = fill(NaN, size(grsp.g)) - landscape[targetidx] = critvec + landscape[g.targetidx] = critvec return _maybe_raster(landscape, grsp) end \ No newline at end of file diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 06777e8..fdd6215 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -43,8 +43,9 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, qˢZⁱqᵗ .= qˢ .* Zⁱ .* qᵗ' sumqˢ = sum(qˢ) for j in axes(Z, 2) - qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] + qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] end + ZqˢZⁱqᵗZt = solve_ldiv!(solver, (I - W)', qˢZⁱqᵗ) ZqˢZⁱqᵗZt .*= Z @@ -59,7 +60,7 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, S::AbstractMatrix, # Matrix of proximities landmarks::AbstractVector; Zⁱ=_inv(Z), - workspace=zeros(size(Z)), + workspace1=zeros(size(Z)), kw... ) axis1, axis2 = axes(Z) @@ -76,22 +77,21 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, throw(DimensionMismatch("")) end - KZⁱ = workspace + KZⁱ = workspace1 KZⁱ .= qˢ .* S .* qᵗ' # If any of the values of KZⁱ is above one then there is a risk of overflow. # Hence, we scale the matrix and apply the scale factor by the end of the # computation. λ = max(1.0, maximum(KZⁱ)) - s = sum(KZⁱ, dims=1) - s .*= inv(λ) - k = vec(s) + k = vec(sum(KZⁱ, dims=1)) * inv(λ) KZⁱ .*= inv.(λ) .* Zⁱ for j in axis2 KZⁱ[landmarks[j], j] -= k[j] .* Zⁱ[landmarks[j], j] end + # KZi overwritten from here ZKZⁱt = solve_ldiv!(solver, (I - W)', KZⁱ) ZKZⁱt .*= λ .* Z @@ -108,6 +108,10 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, solver=nothing, kw... ) + n = size(W,1) + + diagZⁱ = [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] + sumqˢ = sum(qˢ) # FIXME: This should be only done when actually size(Z, 2) < size(Z, 1)/K where K ≈ 10 or so. # Otherwise we just compute many of the elements of Z twice... @@ -115,23 +119,17 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, B = workspace1 B .= sparse_rhs(targetnodes, size(W, 1)) Zrows = solve_ldiv!(solver, (I - W'), B)' + Zrows .*= sumqˢ * qᵗ .* diagZⁱ else - Zrows = Z - end - - n = size(W, 1) - - diagZⁱ = [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] - sumqˢ = sum(qˢ) - Zrows .*= sumqˢ*qᵗ .* diagZⁱ + Zrows = Z .* (sumqˢ * qᵗ .* diagZⁱ) + end qˢZⁱqᵗ = qˢ .* Zⁱ .* qᵗ' - QZⁱᵀZ = qˢZⁱqᵗ' / (I - W) + QZⁱᵀZ = qˢZⁱqᵗ'/(I - W) - RHS = workspace1 - RHS .= QZⁱᵀZ .- Zrows + RHS = QZⁱᵀZ - Zrows edge_betweennesses = copy(W) @@ -163,18 +161,18 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, k̂ = vec(sum(K̂, dims=1)) K̂ .*= Zⁱ - K̂ᵀZ = K̂' / (I - W) - k̂diagZⁱ = k̂ .* [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] + K̂ᵀZ = K̂'/(I - W) + + k̂diagZⁱ = k̂.*[Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] B = workspace1 B .= sparse_rhs(targetnodes, size(W, 1)) Zrows = solve_ldiv!(solver, (I - W'), B) - k̂diagZⁱZ = workspace2 + k̂diagZⁱZ = workspace1 k̂diagZⁱZ .= k̂diagZⁱ .* Zrows' - K̂ᵀZ_minus_diag = workspace2 - K̂ᵀZ_minus_diag .= K̂ᵀZ .- k̂diagZⁱZ + K̂ᵀZ_minus_diag = K̂ᵀZ .- k̂diagZⁱZ edge_betweennesses = copy(W) @@ -206,11 +204,11 @@ function RSP_expected_cost(W::SparseMatrixCSC, throw(DimensionMismatch("")) end if axes(Z, 2) != axes(landmarks, 1) - Z = Z[:, landmarks] + Z = Z[:,landmarks] end if size(Z, 1) == size(Z, 2) - C̄ = Z * ((C .* W) * Z) + C̄ = Z*((C .* W)*Z) else C̄ = solve_ldiv!(solver, (I - W), ((C .* W) * Z)) end @@ -237,9 +235,13 @@ function RSP_survival_probability(Z::AbstractMatrix, θ::Real, landmarks::Abstra end function RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; - survival_probability=RSP_survival_probability(Z, θ, landmarks; kw...), - kw... + survival_probability=nothing, kw... ) + survival_probability = if isnothing(survival_probability) + RSP_survival_probability(Z, θ, landmarks; kw...) + else + survival_probability + end survival_probability .^ (1 / θ) end diff --git a/src/solvers.jl b/src/solvers.jl index 239fef3..dadaa09 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -8,8 +8,9 @@ Abstract supertype for ConScape solvers. # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) + workspaces = _setup_workspace(p, g) return map(p.graph_measures) do gm - compute(gm, p, g; solver=s) + compute(gm, p, g; solver=s, workspaces...) end end @@ -21,8 +22,9 @@ function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid) # TODO remove use of GridRSP where possible grsp = GridRSP(g, cm.θ, Pref, W, Z) + workspaces = _setup_workspace(p, grsp) results = map(p.graph_measures) do gm - compute(gm, p, grsp) + compute(gm, p, grsp; workspaces...) end return _merge_to_stack(results) end diff --git a/test/problem.jl b/test/problem.jl index 183c1bb..046f45f 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -7,9 +7,8 @@ _tempdir = mkdir(tempname()) mov_prob = replace_missing(Raster(joinpath(datadir, "mov_prob_1000.asc")), NaN) hab_qual = replace_missing(Raster(joinpath(datadir, "hab_qual_1000.asc")), NaN) -rast = ConScape.coarse_graining(RasterStack((; affinities=mov_prob, qualities=hab_qual)), 10) - -# rast = RasterStack((; affinities=mov_prob, qualities=hab_qual)) +rast = RasterStack((; affinities=mov_prob, qualities=hab_qual)) +rast = ConScape.coarse_graining(rast, 10) graph_measures = graph_measures = (; func=ConScape.ConnectedHabitat(), @@ -27,6 +26,7 @@ expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor) problem = ConScape.Problem(; graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), ) +@be ConScape.solve(problem, rast) @time result = ConScape.solve(problem, rast) @test result isa RasterStack @test size(result) == size(rast) @@ -37,6 +37,7 @@ vector_problem = ConScape.Problem(; graph_measures, connectivity_measure, solver = ConScape.VectorSolver(; threaded=true), ) +@benchmark ConScape.solve(vector_problem, rast) @time vector_result = ConScape.solve(vector_problem, rast) @test vector_result isa RasterStack @test size(vector_result) == size(rast) diff --git a/test/runtests.jl b/test/runtests.jl index 061e180..270df59 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using ConScape, Test, SparseArrays using Rasters, ArchGDAL, Plots -inclued("problem.jl") +include("problem.jl") # TODO reorganise this into separate files @@ -433,8 +433,7 @@ end (ConScape.survival_probability, 1.3475609129305437e7), (ConScape.power_mean_proximity, 3.279995546746518e6)) - vˡ, λ, vʳ = ConScape.eigmax(grsp, - connectivity_function=connectivity_function) + vˡ, λ, vʳ = ConScape.eigmax(grsp; connectivity_function) # Compute the weighted proximity matrix to check results S = connectivity_function(grsp) From 2f680a1671e860892f48a3446559e3ec1f2f63c6 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 14 Jan 2025 13:40:45 +0100 Subject: [PATCH 03/51] work reduction with traits --- src/graph_measure.jl | 36 ++++++++++++++++++++++++++++------ src/gridrsp.jl | 37 ++++++++++++++++++++++++++++++----- src/randomizedshortestpath.jl | 23 ++++++++++++++++------ src/solvers.jl | 23 +++++++++++++++------- test/problem.jl | 12 ++++++++---- 5 files changed, 103 insertions(+), 28 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 1a9df00..da0b1b0 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -123,19 +123,43 @@ needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true needs_workspace(::GraphMeasure) = false needs_workspace(::BetweennessMeasure) = true - -function _setup_workspace(p::AbstractProblem, grsp::GridRSP) - gm = p.graph_measures - workspace = if mapreduce(needs_workspace, |, gm; init=false) +needs_expected_cost(::GraphMeasure) = false +needs_expected_cost(::EdgeBetweennessKweighted) = true +needs_expected_cost(::MeanKullbackLeiblerDivergence) = true +needs_free_energy_distance(::GraphMeasure) = false +needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true +needs_Aaj_init(::GraphMeasure) = true +hastrait(t, gms) = mapreduce(t, |, gms; init=false) + +function _setup_workspace(p::AbstractProblem, grsp::GridRSP; kw...) + gms = p.graph_measures + workspace1 = if hastrait(needs_workspace, gms) similar(grsp.Z) else nothing end - Zⁱ = if mapreduce(needs_inv, |, gm; init=false) + Zⁱ = if hastrait(needs_inv, gms) _inv(grsp.Z) else nothing end + Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) + Aadj = (I - grsp.W)' + solver_init(p.solver, Aadj), Aadj + else + nothing + end + workspace_kw = (; Zⁱ, workspace1, Aadj_init, Aadj) + expected_cost = if hastrait(needs_expected_cost, gms) + ConScape.expected_cost(grsp; workspace_kw..., kw...) + else + nothing + end + free_energy_distance = if hastrait(needs_free_energy_distance, gms) + ConScape.free_energy_distance(grsp; workspace_kw..., kw...) + else + nothing + end - return (; Zⁱ, workspace) + return (; workspace_kw..., kw..., expected_cost, free_energy_distance) end \ No newline at end of file diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 20e0031..befcb86 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -89,11 +89,23 @@ function betweenness_kweighted(grsp::GridRSP; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, - proximities=connectivity_function(grsp), + proximities=nothing, + expected_cost=nothing, + free_energy_distance=nothing, kw... ) g = grsp.g + if isnothing(proximities) + proximities = if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) + expected_cost + elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) + free_energy_distance + else + connectivity_function(grsp; kw...) + end + end + # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction if g.costfunction === nothing @@ -138,6 +150,7 @@ function edge_betweenness_kweighted(grsp::GridRSP; expected_cost=nothing, kw... ) + g = grsp.g if isnothing(expected_cost) expected_cost = ConScape.expected_cost(grsp; kw...) @@ -213,10 +226,10 @@ function mean_lc_kl_divergence(grsp::GridRSP; kw...) end function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, targetnode::Integer; + graph=SimpleWeightedDiGraph(C), kw... ) n = size(C, 1) - graph = SimpleWeightedDiGraph(C) if !(1 <= targetnode <= n) throw(ArgumentError("target node not found")) end @@ -309,6 +322,8 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; diagvalue=nothing, θ::Union{Nothing,Real}=nothing, approx::Bool=false, + expected_cost=nothing, + free_energy_distance=nothing, kw... ) # Check that distance_transformation function has been passed if no cost function is saved @@ -326,12 +341,24 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; if θ === nothing && connectivity_function !== least_cost_distance throw(ArgumentError("θ must be a positive real number when passing a Grid")) end - connectivity_function(grsp; θ=θ, approx=approx) + if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) + expected_cost + elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) + free_energy_distance + else + connectivity_function(grsp; θ=θ, approx=approx, kw...) + end else if θ !== nothing throw(ArgumentError("θ must be unspecified when passing a GridRSP")) end - connectivity_function(grsp) + if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) + expected_cost + elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) + free_energy_distance + else + connectivity_function(grsp; kw...) + end end if connectivity_function <: DistanceFunction @@ -385,7 +412,7 @@ function connected_habitat(grsp::GridRSP, affinities = copy(g.affinities) affinities[:, node] .= ifelse.(iszero.(affinities[:, node]), 0, avalue) - affinities[node, :] .= ifelse.(iszero.(affinities[node, :]), 0, avalue) + affinitie[node, :] .= ifelse.(iszero.(affinities[node, :]), 0, avalue) newsource_qualities = copy(g.source_qualities) newsource_qualities[cell] = qˢvalue diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index fdd6215..183030c 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -37,6 +37,8 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, Zⁱ=_inv(Z), workspace1=zeros(size(Z)), solver=nothing, + Aadj = (I - W)', + Aadj_init=solver_init(solver, Aadj), kw... ) qˢZⁱqᵗ = workspace1 @@ -46,7 +48,8 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] end - ZqˢZⁱqᵗZt = solve_ldiv!(solver, (I - W)', qˢZⁱqᵗ) + # TODO adjoint of LinearSolver? + ZqˢZⁱqᵗZt = solve_ldiv!(solver, Aadj_init, Aadj, qˢZⁱqᵗ) ZqˢZⁱqᵗZt .*= Z return sum(ZqˢZⁱqᵗZt, dims=2) # diag(Z * ZqˢZⁱqᵗ') @@ -61,6 +64,8 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, landmarks::AbstractVector; Zⁱ=_inv(Z), workspace1=zeros(size(Z)), + Aadj = (I - W)', + Aadj_init=solver_init(solver, Aadj), kw... ) axis1, axis2 = axes(Z) @@ -92,7 +97,7 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, end # KZi overwritten from here - ZKZⁱt = solve_ldiv!(solver, (I - W)', KZⁱ) + ZKZⁱt = solve_ldiv!(solver, Aadj_init, Aadj, KZⁱ) ZKZⁱt .*= λ .* Z return vec(sum(ZKZⁱt, dims=2)) # diag(Z * KZⁱ') @@ -105,6 +110,8 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, targetnodes::AbstractVector; Zⁱ=_inv(Z), workspace1=zeros(size(Z)), + Aadj = (I - W)', + Aadj_init=solver_init(solver, Aadj), solver=nothing, kw... ) @@ -118,7 +125,7 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, if size(Z, 2) < size(Z, 1) B = workspace1 B .= sparse_rhs(targetnodes, size(W, 1)) - Zrows = solve_ldiv!(solver, (I - W'), B)' + Zrows = solve_ldiv!(solver, Aadj_init, Aadj, B)' Zrows .*= sumqˢ * qᵗ .* diagZⁱ else @@ -153,7 +160,9 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, targetnodes::AbstractVector; Zⁱ=_inv(Z), workspace1=zeros(size(Z)), - workspace2=zeros(size(Z)), + # workspace2=zeros(size(Z)), + Aadj = (I - W)', + Aadj_init=solver_init(solver, Aadj), solver=nothing, kw... ) @@ -168,7 +177,7 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, B = workspace1 B .= sparse_rhs(targetnodes, size(W, 1)) - Zrows = solve_ldiv!(solver, (I - W'), B) + Zrows = solve_ldiv!(solver, Aadj_init, Aadj, B) k̂diagZⁱZ = workspace1 k̂diagZⁱZ .= k̂diagZⁱ .* Zrows' @@ -194,6 +203,8 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z::AbstractMatrix, landmarks::AbstractVector; solver=nothing, + A,# = (I - W), + A_init, # =solver_init(solver, A), kw... ) @@ -210,7 +221,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, if size(Z, 1) == size(Z, 2) C̄ = Z*((C .* W)*Z) else - C̄ = solve_ldiv!(solver, (I - W), ((C .* W) * Z)) + C̄ = solve_ldiv!(solver, A_init, A, ((C .* W) * Z)) end C̄ ./= Z diff --git a/src/solvers.jl b/src/solvers.jl index dadaa09..bad6709 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -16,19 +16,29 @@ end function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid) (; A, B, Pref, W) = setup_sparse_problem(g, cm) - Z = solve_ldiv!(s, A, Matrix(B)) + A_init = solver_init(s, A) + Z = solve_ldiv!(s, A_init, A, Matrix(B)) # Check that values in Z are not too small: # _check_z(s, Z, W, g) # TODO remove use of GridRSP where possible grsp = GridRSP(g, cm.θ, Pref, W, Z) - workspaces = _setup_workspace(p, grsp) + workspaces = _setup_workspace(p, grsp; A, A_init) results = map(p.graph_measures) do gm compute(gm, p, grsp; workspaces...) end return _merge_to_stack(results) end +# Fallback generic ldiv solver +solve_ldiv!(solver, A, B) = solve_ldiv!(solver, lu(A), A, B) +# Pre-factorized +function solve_ldiv!(solver, F, A, B) + ldiv!(F, B) +end + +solver_init(solver, A) = lu(A) + """ MatrixSolver(; check) @@ -41,9 +51,6 @@ But may be best for GPUs using CuSSP.jl ? check::Bool = true end -# Fallback generic ldiv solver -solve_ldiv!(solver, A, B) = ldiv!(lu(A), B) - """ VectorSolver(; check, threaded) @@ -55,8 +62,7 @@ less memory use and the capacity for threading threaded::Bool = false end -function solve_ldiv!(s::VectorSolver, A, B) - F = lu(A) +function solve_ldiv!(s::VectorSolver, F, A, B) transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) @@ -137,6 +143,9 @@ function solve_ldiv!(s::LinearSolver, A, B) # Define and initialise the linear problem linprob = LinearProblem(A, b) linsolve = init(linprob, s.args...; s.keywords...) + solve_ldiv!(s, linsolve, A, B) +end +function solve_ldiv!(s::LinearSolver, linsolve, A, B) # TODO: for now we define a Z matrix, but later modify ops # to run column by column without materialising Z # if s.threaded diff --git a/test/problem.jl b/test/problem.jl index 046f45f..80d7ccc 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -14,8 +14,8 @@ graph_measures = graph_measures = (; func=ConScape.ConnectedHabitat(), qbetw=ConScape.BetweennessQweighted(), kbetw=ConScape.BetweennessKweighted(), - # mkld=ConScape.MeanKullbackLeiblerDivergence(), - # mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + #mkld=ConScape.MeanKullbackLeiblerDivergence(), + #mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), ) distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()) connectivity_measure = ConScape.ExpectedCost(; θ=1.0, distance_transformation) @@ -26,7 +26,8 @@ expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor) problem = ConScape.Problem(; graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), ) -@be ConScape.solve(problem, rast) +ConScape.solve(problem, rast) +@profview ConScape.solve(problem, rast) @time result = ConScape.solve(problem, rast) @test result isa RasterStack @test size(result) == size(rast) @@ -35,8 +36,11 @@ problem = ConScape.Problem(; # Threaded solve problem vector_problem = ConScape.Problem(; graph_measures, connectivity_measure, - solver = ConScape.VectorSolver(; threaded=true), + solver = ConScape.VectorSolver(; threaded=false), ) +ConScape.solve(vector_problem, rast) +@profview ConScape.solve(vector_problem, rast) +using BenchmarkTools @benchmark ConScape.solve(vector_problem, rast) @time vector_result = ConScape.solve(vector_problem, rast) @test vector_result isa RasterStack From 2839f449b91793eef8e9f63326f4fcd86a21fe85 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 14 Jan 2025 17:16:49 +0100 Subject: [PATCH 04/51] more memory reductions --- test/problem.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/problem.jl b/test/problem.jl index 80d7ccc..7d263d5 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -8,14 +8,14 @@ _tempdir = mkdir(tempname()) mov_prob = replace_missing(Raster(joinpath(datadir, "mov_prob_1000.asc")), NaN) hab_qual = replace_missing(Raster(joinpath(datadir, "hab_qual_1000.asc")), NaN) rast = RasterStack((; affinities=mov_prob, qualities=hab_qual)) -rast = ConScape.coarse_graining(rast, 10) +# rast = ConScape.coarse_graining(rast, 10) graph_measures = graph_measures = (; func=ConScape.ConnectedHabitat(), qbetw=ConScape.BetweennessQweighted(), kbetw=ConScape.BetweennessKweighted(), - #mkld=ConScape.MeanKullbackLeiblerDivergence(), - #mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + # mkld=ConScape.MeanKullbackLeiblerDivergence(), + # mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), ) distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()) connectivity_measure = ConScape.ExpectedCost(; θ=1.0, distance_transformation) @@ -26,9 +26,12 @@ expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor) problem = ConScape.Problem(; graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), ) -ConScape.solve(problem, rast) @profview ConScape.solve(problem, rast) @time result = ConScape.solve(problem, rast) +ConScape.solve(problem, rast) +using BenchmarkTools +@benchmark ConScape.solve(problem, rast) +plot(result) @test result isa RasterStack @test size(result) == size(rast) @test keys(result) == expected_layers @@ -36,13 +39,14 @@ ConScape.solve(problem, rast) # Threaded solve problem vector_problem = ConScape.Problem(; graph_measures, connectivity_measure, - solver = ConScape.VectorSolver(; threaded=false), + solver = ConScape.VectorSolver(; threaded=true), ) -ConScape.solve(vector_problem, rast) -@profview ConScape.solve(vector_problem, rast) +@time vector_result = ConScape.solve(vector_problem, rast) +plot(vector_result) +using ProfileView +ProfileView.@profview ConScape.solve(vector_problem, rast) using BenchmarkTools @benchmark ConScape.solve(vector_problem, rast) -@time vector_result = ConScape.solve(vector_problem, rast) @test vector_result isa RasterStack @test size(vector_result) == size(rast) @test keys(vector_result) == expected_layers From 3d2b56df0181db6f531f69fa0ea54155d55458a9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 14 Jan 2025 17:17:18 +0100 Subject: [PATCH 05/51] more memory reductions --- src/graph_measure.jl | 18 +++++++++++------- src/grid.jl | 10 +++++----- src/gridrsp.jl | 2 +- src/randomizedshortestpath.jl | 32 ++++++++++++++++++++------------ src/solvers.jl | 7 ++----- 5 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index da0b1b0..e0f571c 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -61,9 +61,9 @@ graph_function(m::EigMax) = eigmax # Map structs to function keywords, # a bit of a hack until we refactor the rest keywords(gm::GraphMeasure, p::AbstractProblem) = - (; _keywords(gm)...)#, solver=solver(p)) + (; _keywords(gm)..., solver=solver(p)) keywords(gm::ConnectedHabitat, p::AbstractProblem) = - (; _keywords(gm)..., approx=connectivity_measure(p).approx)#, solver=solver(p)) + (; _keywords(gm)..., approx=connectivity_measure(p).approx, solver=solver(p)) # A trait for connectivity requirement struct NeedsConnectivity end @@ -123,9 +123,12 @@ needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true needs_workspace(::GraphMeasure) = false needs_workspace(::BetweennessMeasure) = true +# needs_workspace_cr(::GraphMeasure) = false +# needs_workspace_cr(::BetweennessMeasure) = true needs_expected_cost(::GraphMeasure) = false needs_expected_cost(::EdgeBetweennessKweighted) = true needs_expected_cost(::MeanKullbackLeiblerDivergence) = true +needs_edge_betweenness_workspace(::MeanKullbackLeiblerDivergence) = true needs_free_energy_distance(::GraphMeasure) = false needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true needs_Aaj_init(::GraphMeasure) = true @@ -150,16 +153,17 @@ function _setup_workspace(p::AbstractProblem, grsp::GridRSP; kw...) nothing end workspace_kw = (; Zⁱ, workspace1, Aadj_init, Aadj) - expected_cost = if hastrait(needs_expected_cost, gms) - ConScape.expected_cost(grsp; workspace_kw..., kw...) + cf = connectivity_function(p) + expected_cost = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p), kw...) else nothing end - free_energy_distance = if hastrait(needs_free_energy_distance, gms) - ConScape.free_energy_distance(grsp; workspace_kw..., kw...) + free_energy_distance = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p), kw...) else nothing end - return (; workspace_kw..., kw..., expected_cost, free_energy_distance) + return (; ) #workspace_kw..., kw..., expected_cost, free_energy_distance) end \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index d4a7105..b477655 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -261,7 +261,7 @@ function largest_subgraph(g::Grid) # Find the subgraphs scc = strongly_connected_components(graph) - @info "cost graph contains $(length(scc)) strongly connected subgraphs" + # @info "cost graph contains $(length(scc)) strongly connected subgraphs" # Find the largest subgraph i = argmax(length.(scc)) @@ -269,10 +269,10 @@ function largest_subgraph(g::Grid) # extract node list and sort it scci = sort(scc[i]) - ndiffnodes = size(g.costmatrix, 1) - length(scci) - if ndiffnodes > 0 - @info "removing $ndiffnodes nodes from affinity and cost graphs" - end + # ndiffnodes = size(g.costmatrix, 1) - length(scci) + # if ndiffnodes > 0 + # @info "removing $ndiffnodes nodes from affinity and cost graphs" + # end # Extract the adjacency matrix of the largest subgraph affinities = g.affinities[scci, scci] diff --git a/src/gridrsp.jl b/src/gridrsp.jl index befcb86..794fa92 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -412,7 +412,7 @@ function connected_habitat(grsp::GridRSP, affinities = copy(g.affinities) affinities[:, node] .= ifelse.(iszero.(affinities[:, node]), 0, avalue) - affinitie[node, :] .= ifelse.(iszero.(affinities[node, :]), 0, avalue) + affinities[node, :] .= ifelse.(iszero.(affinities[node, :]), 0, avalue) newsource_qualities = copy(g.source_qualities) newsource_qualities[cell] = qˢvalue diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 183030c..50978d6 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -35,7 +35,7 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, targetnodes::AbstractVector; Zⁱ=_inv(Z), - workspace1=zeros(size(Z)), + workspace1=similar(Z), solver=nothing, Aadj = (I - W)', Aadj_init=solver_init(solver, Aadj), @@ -63,7 +63,8 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, S::AbstractMatrix, # Matrix of proximities landmarks::AbstractVector; Zⁱ=_inv(Z), - workspace1=zeros(size(Z)), + workspace1=similar(Z), + solver=nothing, Aadj = (I - W)', Aadj_init=solver_init(solver, Aadj), kw... @@ -109,7 +110,7 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, targetnodes::AbstractVector; Zⁱ=_inv(Z), - workspace1=zeros(size(Z)), + workspace1=similar(Z), Aadj = (I - W)', Aadj_init=solver_init(solver, Aadj), solver=nothing, @@ -159,8 +160,7 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, K::AbstractMatrix, # Matrix of proximities targetnodes::AbstractVector; Zⁱ=_inv(Z), - workspace1=zeros(size(Z)), - # workspace2=zeros(size(Z)), + workspace1=similar(Z), Aadj = (I - W)', Aadj_init=solver_init(solver, Aadj), solver=nothing, @@ -203,8 +203,9 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z::AbstractMatrix, landmarks::AbstractVector; solver=nothing, - A,# = (I - W), - A_init, # =solver_init(solver, A), + A = (I - W), + A_init = solver_init(solver, A), + workspace1=similar(Z), kw... ) @@ -218,11 +219,18 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z = Z[:,landmarks] end - if size(Z, 1) == size(Z, 2) - C̄ = Z*((C .* W)*Z) - else - C̄ = solve_ldiv!(solver, A_init, A, ((C .* W) * Z)) - end + + # When threaded the solver is faster than # a dense matmul + # C̄ = if size(Z, 1) == size(Z, 2) + # B = mul!(workspace1, C .* W, Z) + # mul!(B, C .* W, Z) + # This is a dense-dense matmul... very slow + # Z * B + # else + # TODO + B = mul!(workspace1, C .* W, Z) + C̄ = solve_ldiv!(solver, A_init, A, B) + # end C̄ ./= Z # Zeros in Z can cause NaNs in C̄ ./= Z computation but the limit diff --git a/src/solvers.jl b/src/solvers.jl index bad6709..daa6fd6 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -31,11 +31,9 @@ function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid) end # Fallback generic ldiv solver -solve_ldiv!(solver, A, B) = solve_ldiv!(solver, lu(A), A, B) +solve_ldiv!(solver, A, B) = solve_ldiv!(solver, solver_init(solver, A), A, B) # Pre-factorized -function solve_ldiv!(solver, F, A, B) - ldiv!(F, B) -end +solve_ldiv!(solver::Union{MatrixSolver,Nothing}, F, A, B) = ldiv!(F, B) solver_init(solver, A) = lu(A) @@ -187,7 +185,6 @@ function solve_ldiv!(s::LinearSolver, linsolve, A, B) B[:, i] .= sol.u end # end - @info "LinearSolver finished" return B end From 89aa9c03e3ed364f93f7d31f0df028f0ce713566 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 16 Jan 2025 16:19:59 +0100 Subject: [PATCH 06/51] updates --- Project.toml | 4 ++ src/ConScape.jl | 15 ++++- src/graph_measure.jl | 15 +++-- src/grid.jl | 12 ++-- src/problem.jl | 36 ++++------- src/randomizedshortestpath.jl | 16 ++--- src/solvers.jl | 87 ++++++++++++++++---------- src/tiles.jl | 112 ++++++++++++++++++++++++---------- src/utils.jl | 9 ++- test/problem.jl | 57 +++++++++++------ 10 files changed, 235 insertions(+), 128 deletions(-) diff --git a/Project.toml b/Project.toml index 77d2d50..149a609 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,8 @@ version = "0.3.0" [deps] ArnoldiMethod = "ec485272-7323-5ecc-a04f-4719b315124d" +BandedMatrices = "aae01518-5342-5314-be14-df237901396f" +CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -18,6 +20,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] ArnoldiMethod = "0.0.4, 0.4" +BandedMatrices = "1" +CommonSolve = "0.2" DelimitedFiles = "1" Graphs = "1" LaTeXStrings = "1.1" diff --git a/src/ConScape.jl b/src/ConScape.jl index 4c7ba14..c429bc4 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -1,10 +1,19 @@ module ConScape -using SparseArrays, LinearAlgebra -using Graphs, Plots, SimpleWeightedGraphs, ProgressLogging, ArnoldiMethod -using Rasters +using ArnoldiMethod +using Graphs +using LinearAlgebra using LinearSolve +using Plots +using ProgressLogging +using Rasters +using SimpleWeightedGraphs +using SparseArrays using Rasters.DimensionalData +using BandedMatrices + +import CommonSolve +import CommonSolve: solve, init # Old funcion-based interface abstract type ConnectivityFunction <: Function end diff --git a/src/graph_measure.jl b/src/graph_measure.jl index e0f571c..829c4b6 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -134,7 +134,11 @@ needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true needs_Aaj_init(::GraphMeasure) = true hastrait(t, gms) = mapreduce(t, |, gms; init=false) -function _setup_workspace(p::AbstractProblem, grsp::GridRSP; kw...) +function _measures_workspace(p::AbstractProblem, grsp::GridRSP; + A, + A_init, + kw... +) gms = p.graph_measures workspace1 = if hastrait(needs_workspace, gms) similar(grsp.Z) @@ -147,12 +151,13 @@ function _setup_workspace(p::AbstractProblem, grsp::GridRSP; kw...) nothing end Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) - Aadj = (I - grsp.W)' - solver_init(p.solver, Aadj), Aadj + Aadj = A' + (; F) = A_init + merge(A_init, (; F=F')), Aadj else nothing end - workspace_kw = (; Zⁱ, workspace1, Aadj_init, Aadj) + workspace_kw = (; Zⁱ, workspace1, Aadj_init, Aadj, A, A_init) cf = connectivity_function(p) expected_cost = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p), kw...) @@ -165,5 +170,5 @@ function _setup_workspace(p::AbstractProblem, grsp::GridRSP; kw...) nothing end - return (; ) #workspace_kw..., kw..., expected_cost, free_energy_distance) + return (; workspace_kw..., kw..., expected_cost, free_energy_distance) end \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index b477655..7548168 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -91,13 +91,13 @@ function Grid(nrows::Integer, nothing, costs end - if any(t -> t < 0, nonzeros(costmatrix)) - throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) - end + # if any(t -> t < 0, nonzeros(costmatrix)) + # throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) + # end - if ne(difference(SimpleDiGraph(costmatrix), SimpleDiGraph(affinities))) > 0 - throw(ArgumentError("cost graph contains edges not present in the affinity graph")) - end + # if ne(difference(SimpleDiGraph(costmatrix), SimpleDiGraph(affinities))) > 0 + # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + # end targetidx, targetnodes = _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) qs = [_source_qualities[i] for i in id_to_grid_coordinate_list] diff --git a/src/problem.jl b/src/problem.jl index 9f58f48..b18cec4 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -52,28 +52,16 @@ graph_measures(p::Problem) = p.graph_measures connectivity_measure(p::Problem) = p.connectivity_measure solver(p::Problem) = p.solver -solve(p::Problem, rast::RasterStack) = solve(p, Grid(p, rast)) -solve(p::Problem, g::Grid) = solve(p.solver, connectivity_measure(p), p, g) - -# @kwdef struct ComputeAssesment{P,M,T} -# problem::P -# mem_stats::M -# totalmem::T -# end - -# """ -# allocate(co::ComputeAssesment) - -# Allocate memory required to run `solve` for the assessed ops. +solve(p::Problem, g::Grid; workspace=nothing) = + solve(p.solver, connectivity_measure(p), p, g; workspace) +function solve(p::Problem, rast::RasterStack; workspace=nothing) + grid = isnothing(workspace) ? Grid(p, rast) : workspace.grid + return solve(p, grid; workspace) +end -# The returned object can be passed as the `allocs` keyword to `solve`. -# """ -# function allocate(co::ComputeAssesment) -# zmax = co.zmax -# # But actually do this with GenericMemory using Julia v1.11 -# Z = Matrix{Float64}(undef, co.zmax) -# S = sparse(1:zmax[1], 1:zmax[2], 1.0, zmax...) -# L = lu(S) -# # Just return a NamedTuple for now -# return (; Z, S, L) -# end +function init(p::Problem, rast::RasterStack) + grid = Grid(p, rast) + return (; grid, init(p, grid)...) +end +# Init is conditional on solver and connectivity measure +init(p::AbstractProblem, g::Grid) = init(solver(p), connectivity_measure(p), p, g) \ No newline at end of file diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 50978d6..20d4246 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -2,7 +2,7 @@ function sparse_rhs(targetnodes, n) sparse(targetnodes, 1:length(targetnodes), - 1.0, + 1.014/11-2024, n, length(targetnodes), ) @@ -38,7 +38,7 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, workspace1=similar(Z), solver=nothing, Aadj = (I - W)', - Aadj_init=solver_init(solver, Aadj), + Aadj_init=init(solver, Aadj), kw... ) qˢZⁱqᵗ = workspace1 @@ -66,7 +66,7 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, workspace1=similar(Z), solver=nothing, Aadj = (I - W)', - Aadj_init=solver_init(solver, Aadj), + Aadj_init=init(solver, Aadj), kw... ) axis1, axis2 = axes(Z) @@ -112,7 +112,7 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, Zⁱ=_inv(Z), workspace1=similar(Z), Aadj = (I - W)', - Aadj_init=solver_init(solver, Aadj), + Aadj_init=init(solver, Aadj), solver=nothing, kw... ) @@ -162,7 +162,7 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, Zⁱ=_inv(Z), workspace1=similar(Z), Aadj = (I - W)', - Aadj_init=solver_init(solver, Aadj), + Aadj_init=init(solver, Aadj), solver=nothing, kw... ) @@ -203,9 +203,9 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z::AbstractMatrix, landmarks::AbstractVector; solver=nothing, - A = (I - W), - A_init = solver_init(solver, A), - workspace1=similar(Z), + A,# =(I - W), + A_init,# =init(solver, A), + workspace1,# =similar(Z), kw... ) diff --git a/src/solvers.jl b/src/solvers.jl index daa6fd6..252d5f0 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -7,35 +7,32 @@ Abstract supertype for ConScape solvers. """ Solver # RSP is not used for ConnectivityMeasure, so the solver isn't used -function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) - workspaces = _setup_workspace(p, g) +function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid; + workspace=init(s, cm, p, g), +) return map(p.graph_measures) do gm - compute(gm, p, g; solver=s, workspaces...) + compute(gm, p, g; solver=s, workspace...) end end - -function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid) - (; A, B, Pref, W) = setup_sparse_problem(g, cm) - A_init = solver_init(s, A) - Z = solve_ldiv!(s, A_init, A, Matrix(B)) - # Check that values in Z are not too small: -# _check_z(s, Z, W, g) - +function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid; + workspace=nothing, +) + workspace = isnothing(workspace) ? init(s, cm, p, g) : workspace # TODO remove use of GridRSP where possible - grsp = GridRSP(g, cm.θ, Pref, W, Z) - workspaces = _setup_workspace(p, grsp; A, A_init) results = map(p.graph_measures) do gm - compute(gm, p, grsp; workspaces...) + compute(gm, p, workspace.grsp; workspace...) end return _merge_to_stack(results) end -# Fallback generic ldiv solver -solve_ldiv!(solver, A, B) = solve_ldiv!(solver, solver_init(solver, A), A, B) -# Pre-factorized -solve_ldiv!(solver::Union{MatrixSolver,Nothing}, F, A, B) = ldiv!(F, B) +function init(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) + # TODO what is needed here? + return (;) +end -solver_init(solver, A) = lu(A) +# Fallback generic ldiv solver +solve_ldiv!(solver, A, B) = + solve_ldiv!(solver, init(solver, A), A, B) """ MatrixSolver(; check) @@ -49,6 +46,10 @@ But may be best for GPUs using CuSSP.jl ? check::Bool = true end +solve_ldiv!(::Union{MatrixSolver,Nothing}, (; F), A, B) = ldiv!(F, B) + +init(::Union{Nothing,MatrixSolver}, A::AbstractMatrix) = (; F=lu(A)) + """ VectorSolver(; check, threaded) @@ -60,34 +61,59 @@ less memory use and the capacity for threading threaded::Bool = false end -function solve_ldiv!(s::VectorSolver, F, A, B) +function init(s::Union{MatrixSolver,VectorSolver,Nothing}, + cm::FundamentalMeasure, + p::AbstractProblem, + g::Grid +) + (; A, B, Pref, W) = setup_sparse_problem(g, cm) + # Check that values in Z are not too small: + A_init = init(s, A) + Z = solve_ldiv!(s, A_init, A, Matrix(B)) + _check_z(s, Z, W, g) + grsp = GridRSP(g, cm.θ, Pref, W, Z) + return (; grsp, _measures_workspace(p, grsp; A, A_init)...) +end + +function init(s::VectorSolver, A::AbstractMatrix) + F = lu(A) + if s.threaded + nbuffers = Threads.nthreads() + channel = Channel{Tuple{typeof(F),Vector{Float64}}}(nbuffers) + for _ in 1:nbuffers + # TODO not all of F needs to be duplicated? + # Can we just copy the workspace arrays and resuse the rest? + put!(channel, (deepcopy(F), Vector{eltype(A)}(undef, size(A, 2)))) + end + return (; F, channel) + else + b = zeros(eltype(A), size(A, 2)) + return (; F, b) + end +end + +function solve_ldiv!(s::VectorSolver, init, A, B) transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) # This is basically SparseArrays.UMFPACK._AqldivB_kernel! # But we unroll it to avoid copies or allocation of B if s.threaded + (; F, channel) = init # Create a channel to store problem b vectors for threads # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ - nbuffers = Threads.nthreads() - ch = Channel{Tuple{typeof(F),Vector{Float64}}}(nbuffers) - for i in 1:nbuffers - # TODO not all of F needs to be duplicated? - # Can we just copy the workspace arrays and resuse the rest? - put!(ch, (deepcopy(F), Vector{eltype(A)}(undef, size(B, 1)))) - end Threads.@threads for col in 1:size(B, 2) # Get a workspace from the channel - F_t, b_t = take!(ch) + F_t, b_t = take!(channel) # Copy a column from B b_t .= view(B, :, col) # Solve for the column SparseArrays.UMFPACK.solve!(view(B, :, col), F_t, b_t, transposeoptype) # Reuse the workspace - put!(ch, (F_t, b_t)) + put!(channel, (F_t, b_t)) end else - b = zeros(eltype(B), size(B, 1)) + (; F, b) = init for col in 1:size(B, 2) b .= view(B, :, col) SparseArrays.UMFPACK.solve!(view(B, :, col), F, b, transposeoptype) @@ -201,7 +227,6 @@ function setup_sparse_problem(g::Grid, cm::FundamentalMeasure) return (; A, B, Pref, W) end - # We may have multiple distance_measures per # graph_measure, but we want a single RasterStack. # So we merge the names of the two layers diff --git a/src/tiles.jl b/src/tiles.jl index 24b33b7..b151a09 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -23,19 +23,34 @@ to be run over the same windowed grids. end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) -function solve(wp::WindowedProblem, rast::RasterStack) - ranges = collect(_get_window_ranges(wp, rast)) +function solve(p::WindowedProblem, rast::RasterStack; + test_windows=false, + verbose=false, + workspace=init(p, rast), +) + ranges = collect(_get_window_ranges(p, rast)) mask = _get_window_mask(rast, ranges) - p = wp.problem output_stacks = Vector{RasterStack}(undef, count(mask)) used_ranges = ranges[mask] - if wp.threaded + if test_windows + output_stacks = map(eachindex(used_ranges)) do i + _mask_target_qualities_overlap!(rast, used_ranges[i], p) + end + return Rasters.mosaic(sum, output_stacks; to=rast, missingval=NaN) + end + function run(i) + rs = used_ranges[i] + verbose && println("Solving window $i $rs ") + rast_window = _mask_target_qualities_overlap!(rast, rs, p) + output_stacks[i] = solve(p.problem, rast_window; workspace) + end + if p.threaded Threads.@threads for i in eachindex(used_ranges) - output_stacks[i] = solve(p, rast[used_ranges[i]...]) + run(i) end else for i in eachindex(used_ranges) - output_stacks[i] = solve(p, rast[used_ranges[i]...]) + run(i) end end # Return mosaics of outputs @@ -84,50 +99,74 @@ for nested operations. end StoredProblem(problem; kw...) = StoredProblem(; problem, kw...) -function solve(sp::StoredProblem, rast::RasterStack) - ranges = collect(_get_window_ranges(sp, rast)) +function solve(p::StoredProblem, rast::RasterStack; + verbose=false, +) + ranges = collect(_get_window_ranges(p, rast)) mask = _get_window_mask(rast, ranges) - if sp.threaded - Threads.@threads for rs in ranges[mask] - output = solve(sp.problem, rast[rs...]) - _store(sp, output, rs) + used_ranges = ranges[mask] + function run(i) + rs = used_ranges[i] + verbose && println("Solving window $i $rs ") + rast_window = _mask_target_qualities_overlap!(rast, rs, p) + output = solve(p.problem, rast_window) + _store(p, output, rs) + end + if p.threaded + Threads.@threads for i in eachindex(used_ranges) + run(i) end else - for rs in ranges[mask] - output = solve(sp.problem, rast[rs...]) - _store(sp, output, rs) + for i in eachindex(used_ranges) + run(i) end end end -# Single batch job for clusters -function solve(sp::StoredProblem, rast::RasterStack, i::Int) - ranges = collect(_get_window_ranges(sp, rast)) - rs = ranges[i] - output = solve(sp.problem, rast[rs...]) - _store(sp, output, rs) +# Single batch job for running on clusters +function solve(p::StoredProblem, rast::RasterStack, i::Int; + verbose=false, +) + # Indices i are contiguous so we need to spread them + # accross the actual tiles that need to be done + + # Get all the tile ranges + ranges = collect(_get_window_ranges(p, rast)) + # Get the Bool mask of needed windows + mask = _get_window_mask(rast, ranges) + # Get the Int indices of the needed windows + tile_inds = eachindex(mask)[vec(mask)] + # Get the current window for this job + rs = ranges[tile_inds[i]] + # Get the ranges of the window for this job + output = solve(p.problem, rast[rs...]) + # Store the output rasters for this job to disk + filename = _store(p, output, rs) + return filename end """ - batch_ids(sp::StoredProblem, rast::RasterStack) + count_batches(p::StoredProblem, rast::RasterStack) -Return the batch indices of the windows that need to be computed. +Count the number of batch jobs that would need to be run. -Returns a `Vector{Int}` +A Slurm array job would then be specified "0-$(N-1)" + +Returns an `Int`. """ -function batch_ids(sp::StoredProblem, rast::RasterStack) - ranges = _get_window_ranges(sp, rast) +function count_batches(p::StoredProblem, rast::RasterStack) + ranges = _get_window_ranges(p, rast) mask = _get_window_mask(rast, ranges) - return eachindex(mask)[vec(mask)] + return count(mask) end # Mosaic the stored files to a RasterStack -function Rasters.mosaic(sp::StoredProblem; +function Rasters.mosaic(p::StoredProblem; to, lazy=false, filename=nothing, missingval=NaN, kw... ) - ranges = _get_window_ranges(sp, to) + ranges = _get_window_ranges(p, to) mask = _get_window_mask(to, ranges) - paths = [_window_path(sp, rs) for (rs, m) in zip(ranges, mask) if m] - stacks = [RasterStack(p; lazy, name) for p in paths if isdir(p)] + paths = [_window_path(p, rs) for (rs, m) in zip(ranges, mask) if m] + stacks = [RasterStack(path; lazy, name) for path in paths if isdir(path)] return Rasters.mosaic(sum, stacks; to, filename, missingval, kw...) end @@ -170,6 +209,17 @@ function _get_window_ranges(size::Tuple{Int,Int}, r::Int, overlap::Int) return (map((i, sz) -> i:min(sz, i + d), Tuple(c), size) for c in corners) end +function _mask_target_qualities_overlap!(rast, rs, p, last=false) + o = p.overlap ÷ 2 + fill = zero(eltype(rast.target_qualities)) + dest = rast[rs...] + dest.target_qualities[end-o:end, :] .= fill + dest.target_qualities[begin:begin+o, :] .= fill + dest.target_qualities[:, end-o:end] .= fill + dest.target_qualities[:, begin:begin+o] .= fill + return rast +end + _get_window_mask(::Nothing, ranges) = nothing _get_window_mask(rast::AbstractRasterStack, ranges) = _get_window_mask(_get_target(rast), ranges) diff --git a/src/utils.jl b/src/utils.jl index d627c3f..ab3f84b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -144,7 +144,9 @@ function graph_matrix_from_raster( end end end - return sparse(is, js, vs, m*n, m*n) + # TODO just make a BandedMatrix from the start + # and what happens when this is not square? + return (sparse(is, js, vs, m*n, m*n)) end @@ -188,6 +190,11 @@ function mapnz(f, A::SparseMatrixCSC) map!(f, B.nzval, A.nzval) return B end +function mapnz(f, A::AbstractArray) + B = copy(A) + map!(f, B.data, A.data) + return B +end # Helper to get keyword arguments function _keywords(o::T) where T diff --git a/test/problem.jl b/test/problem.jl index 7d263d5..38da69f 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -7,8 +7,10 @@ _tempdir = mkdir(tempname()) mov_prob = replace_missing(Raster(joinpath(datadir, "mov_prob_1000.asc")), NaN) hab_qual = replace_missing(Raster(joinpath(datadir, "hab_qual_1000.asc")), NaN) -rast = RasterStack((; affinities=mov_prob, qualities=hab_qual)) -# rast = ConScape.coarse_graining(rast, 10) +rast = RasterStack((; affinities=mov_prob, qualities=hab_qual, target_qualities=hab_qual)) +rast.qualities[(rast.affinities .> 0) .& isnan.(rast.qualities)] .= 1e-20 +#rast = ConScape.coarse_graining(rast, 10) + graph_measures = graph_measures = (; func=ConScape.ConnectedHabitat(), @@ -26,32 +28,41 @@ expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor) problem = ConScape.Problem(; graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), ) -@profview ConScape.solve(problem, rast) -@time result = ConScape.solve(problem, rast) -ConScape.solve(problem, rast) -using BenchmarkTools -@benchmark ConScape.solve(problem, rast) -plot(result) +@time workspace = init(problem, rast); +@time result = ConScape.solve(problem, rast; workspace) @test result isa RasterStack @test size(result) == size(rast) @test keys(result) == expected_layers +plot(result) +map(Base.summarysize, workspace) +Base.summarysize(workspace) +@profview ConScape.init(problem, rast) +@profview ConScape.solve(problem, rast; workspace) +ConScape.solve(problem, rast) +using BenchmarkTools +@benchmark ConScape.solve(problem, rast) + +F = lu(rand(100, 100)) # Threaded solve problem vector_problem = ConScape.Problem(; graph_measures, connectivity_measure, solver = ConScape.VectorSolver(; threaded=true), ) -@time vector_result = ConScape.solve(vector_problem, rast) -plot(vector_result) -using ProfileView -ProfileView.@profview ConScape.solve(vector_problem, rast) -using BenchmarkTools -@benchmark ConScape.solve(vector_problem, rast) +@time workspace = init(vector_problem, rast); +@time vector_result = ConScape.solve(vector_problem, rast; workspace) @test vector_result isa RasterStack @test size(vector_result) == size(rast) @test keys(vector_result) == expected_layers @test all(vector_result.func_exp .=== result.func_exp) +@profview workspace = init(vector_problem, rast); +map(w -> sizeof(w) / 10^6, workspace) +@profview ConScape.solve(vector_problem, rast; workspace) +Plots.plot(vector_result) +@benchmark +ConScape.solve(vector_problem, rast) + # Problem with custom solver linearsolve_problem = ConScape.Problem(; graph_measures, connectivity_measure, @@ -64,19 +75,26 @@ linearsolve_problem = ConScape.Problem(; # WindowedProblem returns a RasterStack windowed_problem = ConScape.WindowedProblem(problem; - radius=40, overlap=10, + radius=40, overlap=10, threaded=true ) -windowed_result = ConScape.solve(windowed_problem, rast) +windowed_result = ConScape.solve(windowed_problem, rast, verbose=true) + +using GLMakie +Rasters.rplot(windowed_result) @test windowed_result isa RasterStack @test size(windowed_result) == size(rast) @test keys(windowed_result) == expected_layers +window_tiles = ConScape.solve(windowed_problem, rast; test_windows=true, verbose=true) +plot(window_tiles) +Rasters.rplot(window_tiles) + # StoredProblem writes files to disk and mosaics to RasterStack stored_problem = ConScape.StoredProblem(problem; path=tempname(), radius=40, overlap=10, threaded=true ) -ConScape.solve(stored_problem, rast) +ConScape.solve(stored_problem, rast; verbose=true) stored_result = mosaic(stored_problem; to=rast) @test stored_result isa RasterStack @test size(stored_result) == size(rast) @@ -84,6 +102,7 @@ stored_result = mosaic(stored_problem; to=rast) @test keys(stored_result) == Tuple(sort(collect(expected_layers))) # Check the answer matches the WindowedProblem @test all(stored_result.func_exp .=== windowed_result.func_exp) +plot(stored_result) # StoredProblem can be run as batch jobs for clusters # We just need a new path to make sure the result is from a new run @@ -103,10 +122,10 @@ batch_result = mosaic(stored_problem2; to=rast) # StoredProblem can be nested with WindowedProblem small_windowed_problem = ConScape.WindowedProblem(problem; - radius=25, overlap=5, + radius=25, overlap=10, ) nested_problem = ConScape.StoredProblem(small_windowed_problem; - path=tempname(), radius=40, overlap=10, threaded=true + path=tempname(), radius=40, overlap=10, threaded=false ) ConScape.solve(nested_problem, rast) nested_result = mosaic(nested_problem; to=rast) From 9ad50fc116257acf5168fd2009d3f98cddd52649 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 20 Jan 2025 15:42:03 +0100 Subject: [PATCH 07/51] accurate allocation estimates --- src/ConScape.jl | 1 + src/allocations.jl | 97 +++++++++++++++++ src/graph_measure.jl | 105 ++++++++++++++---- src/grid.jl | 27 ++--- src/gridrsp.jl | 139 ++++++++++++++---------- src/problem.jl | 26 ++++- src/randomizedshortestpath.jl | 131 +++++++++++++---------- src/solvers.jl | 196 ++++++++++++++++++---------------- src/tiles.jl | 70 +++++++----- test/problem.jl | 83 ++++++++++---- test/runtests.jl | 4 +- 11 files changed, 586 insertions(+), 293 deletions(-) create mode 100644 src/allocations.jl diff --git a/src/ConScape.jl b/src/ConScape.jl index c429bc4..cd8256f 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -46,5 +46,6 @@ include("connectivity_measure.jl") include("problem.jl") include("solvers.jl") include("tiles.jl") +include("allocations.jl") end diff --git a/src/allocations.jl b/src/allocations.jl new file mode 100644 index 0000000..ddbc79e --- /dev/null +++ b/src/allocations.jl @@ -0,0 +1,97 @@ +function allocations(p::Problem, sze::Tuple{Int,Int}; + nthreads=Threads.nthreads() +) + gms = graph_measures(p) + dense_size = sizeofdense(p, sze) + sparse_size = sizeofsparse(p, sze) + init_size = sizeofinits(p, sze) + grid_size = sizeofgrid(p, sze) + + return_size = sum(map(gm -> sizeofreturn(gm, sze), gms)) + + total = sparse_size + dense_size + init_size + return_size + grid_size + (; total, sparse_size, dense_size, init_size, return_size, grid_size) +end + +function allocations(p::AbstractWindowedProblem, sze::Tuple{Int,Int}; + nthreads=Threads.nthreads() +) + if p.threaded + allocations(p.problem, sze) * nthreads + else + allocations(p.problem, sze) + end +end + +# This is approximate. +# TODO test with different size inputs +allocations(::MatrixSolver, sze) = sze[1] * 20 * sizeof(Float64) +function allocations(::VectorSolver, sze; + nthread=Threads.nthreads(), +) + if s.threaded + # TODO add lu workspace size * nthreads + sze[1] * 20 * sizeof(Float64) + else + sze[1] * 20 * sizeof(Float64) + end +end + +function sizeofgrid(p::Problem, (nsources, ntargets)) + ntargetarrays = 9 + targetallocssize = ntargets * ntargetarrays + # id lookups count for 2 + nsourcearrays = 9 + sourceallocssize = nsources * nsourcearrays + sourceidsize = nsources * 2 * sizeof(Int) + targetidsize = ntargets * 2 * sizeof(Int) + + # Dense storage + sourcequalitysize = nsources * sizeof(Float64) + # Sparse storage needs indices as well as values + targetqualitysize = ntargets * sizeof(Float64) + ntargets * sizeof(Int) + + return targetallocssize + sourceallocssize + + sourceidsize + targetidsize + + sourcequalitysize + targetqualitysize +end + +# Slightly inaccurate as the band is not complete in corners +# and there are a few extra allocations that counterbalance that +function sizeofsparse((nsources, ntargets)) + windowsize = 8 + ntargets * windowsize * (sizeof(Float64) + sizeof(Int)) +end +function sizeofsparse(p, sze::Tuple{Int,Int}) + # affinities + costmatrix + A + W + Pref + B_sparse + CW - others? + 7 * sizeofsparse(sze) +end + +sizeofdense(sze::Tuple{Int,Int}) = prod(sze) * sizeof(Float64) +function sizeofdense(p::Problem, sze::Tuple{Int,Int}) + gms = graph_measures(p) + n_workspaces = mapreduce(needs_workspaces, max, gms) + n_permuted_workspaces = mapreduce(needs_permuted_workspaces, max, gms) + + required_dense = 1 + + n_workspaces + + n_permuted_workspaces + + hastrait(needs_free_energy_distance, gms) + + hastrait(needs_expected_cost, gms) + + hastrait(needs_inv, gms) + + return sizeofdense(sze) * required_dense +end + +function sizeofinits(p::Problem, sze::Tuple{Int,Int}) + sum(graph_measures(p)) do gm + allocations(solver(p), sze) + end +end + +sizeofreturn(gm::GraphMeasure, sze) = sizeofreturn(returntype(gm), sze) +sizeofreturn(::ReturnsDenseSpatial, (n, m)) = n * sizeof(Float64) +sizeofreturn(::ReturnsSparse, (n, m)) = n * m * 8 # Roughly this for 8 neighbors +sizeofreturn(::ReturnsScalar, (n, m)) = sizeof(Float64) +sizeofreturn(r::ReturnsOther, (n, m)) = r.f(n, m) + diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 829c4b6..24bc4f6 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -6,6 +6,29 @@ These are lazy definitions of conscape functions. """ abstract type GraphMeasure end +abstract type ReturnType end +struct ReturnsDenseSpatial <: ReturnType end +struct ReturnsSparse <: ReturnType end +struct ReturnsScalar <: ReturnType end +struct ReturnsOther{F} <: ReturnType + f::F +end + +""" + NoWriteArray + +A Julia AbstractArray wrapper that errors on `setindex!`, for testing. +""" +mutable struct NoWriteArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} + __data::A +end + +Base.size(A::NoWriteArray) = size(A.__data) +Base.copy(A::NoWriteArray) = copy(A.__data) +Base.getindex(A::NoWriteArray, i...) = A.__data[i...] +Base.setindex!(A::NoWriteArray, v, i...) = error("Cannot write to NoWriteArray") +Base.:(==)(A::NoWriteArray, B::NoWriteArray) = A.__data == B.__data + keywords(o::GraphMeasure) = _keywords(o) abstract type TopologicalMeasure <: GraphMeasure end @@ -42,6 +65,16 @@ end struct MeanLeastCostKullbackLeiblerDivergence <: PathDistributionMeasure end struct MeanKullbackLeiblerDivergence <: PathDistributionMeasure end +returntype(::EdgeBetweennessQweighted) = ReturnsSparse() +returntype(::EdgeBetweennessKweighted) = ReturnsSparse() +returntype(::BetweennessQweighted) = ReturnsDenseSpatial() +returntype(::BetweennessKweighted) = ReturnsDenseSpatial() +returntype(::ConnectedHabitat) = ReturnsDenseSpatial() +returntype(::Criticality) = ReturnsDenseSpatial() +returntype(::EigMax) = ReturnsOther((n, m) -> n + m) +returntype(::MeanLeastCostKullbackLeiblerDivergence) = ReturnsScalar() +returntype(::MeanKullbackLeiblerDivergence) = ReturnsScalar() + # Map structs to functions # These return Rasters @@ -119,16 +152,27 @@ function compute(::NoConnectivity, end # Workspace allocation traits +return_type(::GraphMeasure) = false needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true -needs_workspace(::GraphMeasure) = false -needs_workspace(::BetweennessMeasure) = true -# needs_workspace_cr(::GraphMeasure) = false -# needs_workspace_cr(::BetweennessMeasure) = true +needs_workspaces(::GraphMeasure) = 0 +needs_workspaces(::BetweennessMeasure) = 2 +needs_workspaces( + ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} +) = 3 +needs_permuted_workspaces(::GraphMeasure) = 0 +needs_permuted_workspaces(::EdgeBetweennessKweighted) = 1 +needs_edge_betweennesses(::GraphMeasure) = false +needs_edge_betweennesses( + ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} +) = true +needs_dense_A(::GraphMeasure) = false +needs_dense_A( + ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} +) = true needs_expected_cost(::GraphMeasure) = false needs_expected_cost(::EdgeBetweennessKweighted) = true needs_expected_cost(::MeanKullbackLeiblerDivergence) = true -needs_edge_betweenness_workspace(::MeanKullbackLeiblerDivergence) = true needs_free_energy_distance(::GraphMeasure) = false needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true needs_Aaj_init(::GraphMeasure) = true @@ -137,38 +181,57 @@ hastrait(t, gms) = mapreduce(t, |, gms; init=false) function _measures_workspace(p::AbstractProblem, grsp::GridRSP; A, A_init, + workspace, kw... ) gms = p.graph_measures - workspace1 = if hastrait(needs_workspace, gms) - similar(grsp.Z) - else - nothing - end + n_workspaces = mapreduce(needs_workspaces, max, gms) + n_permuted_workspaces = mapreduce(needs_permuted_workspaces, max, gms) + workspaces = [workspace, (similar(grsp.Z) for _ in 1:n_workspaces-1)...] + permuted_workspaces = [similar(grsp.Z') for _ in 1:n_permuted_workspaces] Zⁱ = if hastrait(needs_inv, gms) - _inv(grsp.Z) + NoWriteArray(_inv(grsp.Z)) else nothing end Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) - Aadj = A' - (; F) = A_init - merge(A_init, (; F=F')), Aadj + # Just take the adjoint of the factorization of A + # where possible to save calculations and memory + Aadj_init, Aadj = if hasproperty(A_init, :F) + Aadj = A' + # Use adjoint factorization of A rather than recalculating for A' + Aadj_init = merge(A_init, (; F=A_init.F')) + Aadj_init, Aadj + else + # LinearSolve.jl cant handle the adjoint + # so we duplicate work and allocations + Aadj = sparse(A') + Aadj_init = init(solver(p), Aadj) + Aadj_init, Aadj + end + Aadj_init, Aadj else - nothing + nothing, nothing end - workspace_kw = (; Zⁱ, workspace1, Aadj_init, Aadj, A, A_init) + # Create an intermediate workspace to use in computations + workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init, kw...) cf = connectivity_function(p) - expected_cost = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost - ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p), kw...) + expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + NoWriteArray(ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p), kw...)) + else + nothing + end + free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + NoWriteArray(ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p), kw...)) else nothing end - free_energy_distance = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance - ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p), kw...) + edge_betweennesses = if hastrait(needs_edge_betweennesses, gms) + copy(grsp.W) else nothing end - return (; workspace_kw..., kw..., expected_cost, free_energy_distance) + CW = grsp.g.costmatrix .* grsp.W + return (; grsp, workspace_kw..., CW, free_energy_distances, expected_costs, edge_betweennesses) end \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index 7548168..eed3774 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -94,10 +94,12 @@ function Grid(nrows::Integer, # if any(t -> t < 0, nonzeros(costmatrix)) # throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) # end + cost_digraph = SimpleDiGraph(costmatrix) + affinity_digraph = SimpleDiGraph(affinities) - # if ne(difference(SimpleDiGraph(costmatrix), SimpleDiGraph(affinities))) > 0 - # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) - # end + if ne(difference(cost_digraph, affinity_digraph)) > 0 + throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + end targetidx, targetnodes = _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) qs = [_source_qualities[i] for i in id_to_grid_coordinate_list] @@ -141,7 +143,7 @@ function Grid(rast::RasterStack; Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, costs, kw...) end # TODO move functions like MinusLog to problems and pass in here -Grid(p::AbstractProblem, rast::RasterStack) = Grid(rast) +Grid(p::AbstractProblem, rast::RasterStack; kw...) = Grid(rast; kw...) Base.size(g::Grid) = (g.nrows, g.ncols) DimensionalData.dims(g::Grid) = g.dims @@ -172,8 +174,8 @@ _unwrap(R::Raster) = parent(R) _unwrap(R::AbstractMatrix) = R # Compute a vector of the cartesian indices of nonzero target qualities and # the corresponding node id corresponding to the indices -_targetidx(q::AbstractMatrix, grididxs::Vector) = grididxs -_targetidx(q::SparseMatrixCSC, grididxs::Vector) = +_targetidx(q::AbstractMatrix, grididxs::AbstractVector) = grididxs +_targetidx(q::SparseMatrixCSC, grididxs::AbstractVector) = CartesianIndex.(findnz(q)[1:2]...) ∩ grididxs _targetidx_and_nodes(g::Grid) = @@ -255,7 +257,7 @@ will have the same size as the input `Grid` but only nodes associated with the largest subgraph of the affinities will be active. """ function largest_subgraph(g::Grid) - # Convert cost matrix to graph + # Convert cost matrix to graph, todo: is `permute=false` needed graph = SimpleWeightedDiGraph(g.costmatrix, permute=false) # Find the subgraphs @@ -278,16 +280,17 @@ function largest_subgraph(g::Grid) affinities = g.affinities[scci, scci] # affinities = convert(SparseMatrixCSC{Float64,Int}, graph[scci]) - id_to_grid_coordinate_list = g.id_to_grid_coordinate_list[scci] - targetidx, targetnodes = _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list) - qs = [g.source_qualities[i] for i in id_to_grid_coordinate_list] - qt = [g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx] + costmatrix = g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities) + id_to_grid_coordinate_list = NoWriteArray(g.id_to_grid_coordinate_list[scci]) + targetidx, targetnodes = map(NoWriteArray, _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list)) + qs = NoWriteArray([g.source_qualities[i] for i in id_to_grid_coordinate_list]) + qt = NoWriteArray([g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx]) return Grid( g.nrows, g.ncols, affinities, g.costfunction, - g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities), + costmatrix, id_to_grid_coordinate_list, g.source_qualities, g.target_qualities, diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 794fa92..e86c7b8 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -90,17 +90,21 @@ function betweenness_kweighted(grsp::GridRSP; distance_transformation=nothing, diagvalue=nothing, proximities=nothing, - expected_cost=nothing, - free_energy_distance=nothing, + workspaces=nothing, + expected_costs=nothing, + free_energy_distances=nothing, kw... ) g = grsp.g + workspace1, workspaces... = workspaces if isnothing(proximities) - proximities = if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) - expected_cost - elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) - free_energy_distance + proximities = if connectivity_function == ConScape.expected_cost && !isnothing(expected_costs) + workspace1 .= expected_costs + workspace1 + elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distances) + workspace1 .= free_energy_distances + workspace1 else connectivity_function(grsp; kw...) end @@ -118,14 +122,11 @@ function betweenness_kweighted(grsp::GridRSP; if connectivity_function <: DistanceFunction map!(distance_transformation, proximities, proximities) end + _maybe_set_diagonal!(proximities, g, diagvalue) - if !isnothing(diagvalue) - for (j, i) in enumerate(g.targetnodes) - proximities[i, j] = diagvalue - end - end - - betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) + betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; + workspaces, kw... + ) bet = fill(NaN, g.nrows, g.ncols) for (i, v) in enumerate(betvec) bet[g.id_to_grid_coordinate_list[i]] = v @@ -147,22 +148,25 @@ end function edge_betweenness_kweighted(grsp::GridRSP; distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing, - expected_cost=nothing, + expected_costs=nothing, + workspaces=[similar(grsp.Z), similar(grsp.Z)], kw... ) - + workspace1, workspaces... = workspaces g = grsp.g - if isnothing(expected_cost) - expected_cost = ConScape.expected_cost(grsp; kw...) + if isnothing(expected_costs) + expected_costs = ConScape.expected_cost(grsp; kw...) end - proximities = map(distance_transformation, expected_cost) - _set_diagonal!(proximities, g, diagvalue) + proximities = map!(distance_transformation, workspace1, expected_costs) + _maybe_set_diagonal!(proximities, g, diagvalue) - return RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) + return RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; + workspaces, kw... + ) end -_set_diagonal!(proximities, g, diagvalue::Nothing) = nothing -function _set_diagonal!(proximities, g, diagvalue) +_maybe_set_diagonal!(proximities, g, diagvalue::Nothing) = nothing +function _maybe_set_diagonal!(proximities, g, diagvalue) for (j, i) in enumerate(g.targetnodes) proximities[i, j] = diagvalue end @@ -193,23 +197,29 @@ least_cost_distance(grsp::GridRSP; kw...) = least_cost_distance(grsp.g; kw...) Compute the mean Kullback–Leibler divergence between the free energy distances and the RSP expected costs for `grsp::GridRSP`. """ function mean_kl_divergence(grsp::GridRSP; - free_energy_distance=nothing, - expected_cost=nothing, + free_energy_distances=nothing, + expected_costs=nothing, kw... ) g = grsp.g - if isnothing(free_energy_distance) - free_energy_distance = RSP_free_energy_distance(grsp.Z, grsp.θ, g.targetnodes; kw...) + free_energy_distances = if isnothing(free_energy_distances) + RSP_free_energy_distance(grsp.Z, grsp.θ, g.targetnodes; kw...) + else + free_energy_distances end - if isnothing(expected_cost) - expected_cost = ConScape.expected_cost(grsp; kw...) + expected_costs = if isnothing(expected_costs) + ConScape.expected_cost(grsp; kw...) + else + expected_costs end - return mean_kl_divergence(grsp::GridRSP, free_energy_distance, expected_cost) + return mean_kl_divergence(grsp::GridRSP, free_energy_distances, expected_costs; kw...) end -function mean_kl_divergence(grsp::GridRSP, free_energy_distance, expected_cost) +function mean_kl_divergence(grsp::GridRSP, free_energy_distances, expected_costs; + workspaces, kw... +) g = grsp.g - return g.qs' * (free_energy_distance - expected_cost) * g.qt * grsp.θ + return g.qs' * (workspaces[1] .= free_energy_distances .- expected_costs) * g.qt * grsp.θ end @@ -220,28 +230,31 @@ Compute the mean Kullback–Leibler divergence between the least-cost path and t """ function mean_lc_kl_divergence(grsp::GridRSP; kw...) g = grsp.g + C = g.costmatrix + cost_weighted_digraph = SimpleWeightedDiGraph(C) + n = size(C, 1) + from = collect(1:n) + kl_div = zeros(n) # TODO make this a loop - div = hcat([least_cost_kl_divergence(g.costmatrix, grsp.Pref, i; kw...) for i in g.targetnodes]...) + div = hcat([least_cost_kl_divergence(C, grsp.Pref, i; n, from, kl_div, cost_weighted_digraph, kw...) for i in g.targetnodes]...) return g.qs' * div * g.qt end function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, targetnode::Integer; - graph=SimpleWeightedDiGraph(C), + cost_weighted_digraph=SimpleWeightedDiGraph(C), + n=size(C, 1), + from=collect(1:n), + kl_div=zeros(n), kw... ) - n = size(C, 1) if !(1 <= targetnode <= n) throw(ArgumentError("target node not found")) end - dsp = dijkstra_shortest_paths(graph, targetnode) + dsp = dijkstra_shortest_paths(cost_weighted_digraph, targetnode) parents = dsp.parents parents[targetnode] = targetnode - - from = collect(1:n) - to = copy(parents) - - kl_div = zeros(n) + to = copy(parents) while true notdone = false @@ -262,10 +275,10 @@ function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, tar end # Pointer swap - tmp = from + tmp = from from = to - to = tmp + to = tmp end return kl_div @@ -317,7 +330,7 @@ argument can be set to `true` to switch to a cheaper approximate solution of the `connectivity_function`. The default value is `false`. """ function connected_habitat(grsp::Union{Grid,GridRSP}; - connectivity_function=expected_cost, + connectivity_function=ConScape.expected_cost, distance_transformation=nothing, diagvalue=nothing, θ::Union{Nothing,Real}=nothing, @@ -342,9 +355,9 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; throw(ArgumentError("θ must be a positive real number when passing a Grid")) end if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) - expected_cost + copy(expected_cost) elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) - free_energy_distance + copy(free_energy_distance) else connectivity_function(grsp; θ=θ, approx=approx, kw...) end @@ -353,9 +366,9 @@ function connected_habitat(grsp::Union{Grid,GridRSP}; throw(ArgumentError("θ must be unspecified when passing a GridRSP")) end if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) - expected_cost + copy(expected_cost) elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) - free_energy_distance + copy(free_energy_distance) else connectivity_function(grsp; kw...) end @@ -394,7 +407,8 @@ function connected_habitat(grsp::GridRSP, diagvalue=nothing, avalue=floatmin(), # smallest non-zero value qˢvalue=0.0, - qᵗvalue=0.0) + qᵗvalue=0.0, + kw...) g = grsp.g @@ -462,10 +476,14 @@ function LinearAlgebra.eigmax(grsp::GridRSP; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, + workspaces=[similar(grsp.Z), similar(grsp.Z)], tol=1e-14, + expected_costs=nothing, + free_energy_distances=nothing, kw... ) g = grsp.g + workspace1, workspace2, workspace3 = workspaces # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction @@ -476,23 +494,30 @@ function LinearAlgebra.eigmax(grsp::GridRSP; end end - S = connectivity_function(grsp; kw...) + S = if connectivity_function == ConScape.expected_cost && !isnothing(expected_costs) + # workspace1 .= expected_costs + # workspace1 + copy(expected_costs) + elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distances) + workspace1 .= free_energy_distances + workspace1 + else + connectivity_function(grsp; kw...) + end + # S = connectivity_function(grsp; kw...) if connectivity_function <: DistanceFunction map!(distance_transformation, S, S) end - if diagvalue !== nothing - for (j, i) in enumerate(g.targetnodes) - S[i, j] = diagvalue - end - end + _maybe_set_diagonal!(S, g, diagvalue) # quality scaled proximity matrix - qSq = g.qs .* S .* g.qt' + qSq = workspace2 .= g.qs .* S .* g.qt' # square submatrix defined by extracting the rows corresponding to landmarks - qSq₀₀ = qSq[g.targetnodes, :] + qSq₀₀ = view(workspace3, 1:size(workspace3, 2), :) + qSq₀₀ .= view(qSq, g.targetnodes, :) # size of the full problem n = size(g.affinities, 1) @@ -556,7 +581,7 @@ function LinearAlgebra.eigmax(grsp::GridRSP; # construct full right vector vʳ = fill(NaN, n) vʳ[g.targetnodes] = vʳ₀ - vʳ[p₁] = qSq[p₁,:]*vʳ₀/λ₀[1] + vʳ[p₁] = view(qSq, p₁, :) *vʳ₀ / λ₀[1] # compute left vector (of submatrix) by shift-invert Flu = lu(qSq₀₀ - λ₀[1]*I) diff --git a/src/problem.jl b/src/problem.jl index b18cec4..6156b51 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -29,6 +29,27 @@ and time reequiremtents on a cluster """ function assess end +""" + allocations(p::AbstractProblem, size::Tuple{Int,Int}) + allocations(p::AbstractProblem, rast::RasterStack) + +Calculate allocations in Bytes required to run the problem. +The maximum dense target size will be used, so that `size` +is symmetrical. You can pass e.g. `(1000, 200)`. where you +know the size of the largest sparse matrix generated from `rast`. + +`allocations` will likely underestimate as Julia may need to allocate +for compilatation and other things outside of our control. + +A warning will be thrown for problem components whos allocations +are not well known. +""" +function allocations end +function allocations(p::AbstractProblem, rast::RasterStack) + s = prod(size(rast)) + allocations(p, (s, s)) +end + """ Problem(graph_measures...; solver, θ) @@ -58,9 +79,10 @@ function solve(p::Problem, rast::RasterStack; workspace=nothing) grid = isnothing(workspace) ? Grid(p, rast) : workspace.grid return solve(p, grid; workspace) end +solve(p::Problem, workspace::NamedTuple) = solve(p, workspace.grid; workspace) -function init(p::Problem, rast::RasterStack) - grid = Grid(p, rast) +function init(p::Problem, rast::RasterStack; kw...) + grid = Grid(p, rast; kw...) return (; grid, init(p, grid)...) end # Init is conditional on solver and connectivity measure diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 20d4246..027f3b5 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -2,7 +2,7 @@ function sparse_rhs(targetnodes, n) sparse(targetnodes, 1:length(targetnodes), - 1.014/11-2024, + 1.0, n, length(targetnodes), ) @@ -35,12 +35,13 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, targetnodes::AbstractVector; Zⁱ=_inv(Z), - workspace1=similar(Z), + workspaces=[similar(Z), similar(Z)], solver=nothing, Aadj = (I - W)', Aadj_init=init(solver, Aadj), kw... ) + workspace1, workspace2 = workspaces qˢZⁱqᵗ = workspace1 qˢZⁱqᵗ .= qˢ .* Zⁱ .* qᵗ' sumqˢ = sum(qˢ) @@ -49,7 +50,7 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, end # TODO adjoint of LinearSolver? - ZqˢZⁱqᵗZt = solve_ldiv!(solver, Aadj_init, Aadj, qˢZⁱqᵗ) + ZqˢZⁱqᵗZt = ldiv!(solver, Aadj_init, qˢZⁱqᵗ; B_copy=copy!(workspace2, qˢZⁱqᵗ)) ZqˢZⁱqᵗZt .*= Z return sum(ZqˢZⁱqᵗZt, dims=2) # diag(Z * ZqˢZⁱqᵗ') @@ -62,13 +63,14 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, # Target qualities S::AbstractMatrix, # Matrix of proximities landmarks::AbstractVector; - Zⁱ=_inv(Z), - workspace1=similar(Z), + Zⁱ, # =_inv(Z), + workspaces, # =similar(Z), solver=nothing, - Aadj = (I - W)', - Aadj_init=init(solver, Aadj), + Aadj, # = (I - W)', + Aadj_init, # =init(solver, Aadj), kw... ) + workspace1 = workspaces[1] axis1, axis2 = axes(Z) if axis1 != axes(qˢ, 1) throw(DimensionMismatch("")) @@ -83,14 +85,17 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, throw(DimensionMismatch("")) end - KZⁱ = workspace1 - KZⁱ .= qˢ .* S .* qᵗ' + # Write into proximities + KZⁱ = S + KZⁱ .*= qˢ .* qᵗ' # If any of the values of KZⁱ is above one then there is a risk of overflow. # Hence, we scale the matrix and apply the scale factor by the end of the # computation. λ = max(1.0, maximum(KZⁱ)) - k = vec(sum(KZⁱ, dims=1)) * inv(λ) + scratch = view(workspace1, :, 1:1) + k = vec(sum!(scratch, KZⁱ)) + k .*= inv(λ) KZⁱ .*= inv.(λ) .* Zⁱ for j in axis2 @@ -98,10 +103,12 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, end # KZi overwritten from here - ZKZⁱt = solve_ldiv!(solver, Aadj_init, Aadj, KZⁱ) + # ZKZⁱt = (I - W)'\KZⁱ + ZKZⁱt = ldiv!(solver, Aadj_init, KZⁱ; B_copy=copy!(workspace1, KZⁱ)) ZKZⁱt .*= λ .* Z - return vec(sum(ZKZⁱt, dims=2)) # diag(Z * KZⁱ') + scratch = view(workspace1, :, 1:1) + return vec(sum!(scratch, ZKZⁱt)) # diag(Z * KZⁱ') end function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, @@ -109,14 +116,17 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, qˢ::AbstractVector, qᵗ::AbstractVector, targetnodes::AbstractVector; - Zⁱ=_inv(Z), - workspace1=similar(Z), - Aadj = (I - W)', - Aadj_init=init(solver, Aadj), solver=nothing, + Zⁱ, # =_inv(Z), + workspaces, # =similar(Z), + Aadj, # = (I - W)', + Aadj_init, # =init(solver, Aadj), + B_sparse, # =sparse_rhs(targetnodes, size(W, 1)), + edge_betweennesses, # =copy(W), kw... ) - n = size(W,1) + n = size(W, 1) + workspace1, workspace2, workspace3 = workspaces diagZⁱ = [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] sumqˢ = sum(qˢ) @@ -124,27 +134,25 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, # FIXME: This should be only done when actually size(Z, 2) < size(Z, 1)/K where K ≈ 10 or so. # Otherwise we just compute many of the elements of Z twice... if size(Z, 2) < size(Z, 1) - B = workspace1 - B .= sparse_rhs(targetnodes, size(W, 1)) - Zrows = solve_ldiv!(solver, Aadj_init, Aadj, B)' + B = workspace1 .= B_sparse + Zrows = ldiv!(solver, Aadj_init, B; B_copy=copy!(workspace2, B))' Zrows .*= sumqˢ * qᵗ .* diagZⁱ else - - Zrows = Z .* (sumqˢ * qᵗ .* diagZⁱ) + Zrows = workspace1 .= Z .* (sumqˢ * qᵗ .* diagZⁱ) end - qˢZⁱqᵗ = qˢ .* Zⁱ .* qᵗ' - - QZⁱᵀZ = qˢZⁱqᵗ'/(I - W) - - RHS = QZⁱᵀZ - Zrows + qˢZⁱqᵗ = workspace2 .= qˢ .* Zⁱ .* qᵗ' + # QZⁱᵀZ = qˢZⁱqᵗ' / A + QZⁱᵀZ = ldiv!(solver, Aadj_init, qˢZⁱqᵗ; B_copy=copy!(workspace3, qˢZⁱqᵗ))' - edge_betweennesses = copy(W) + Zrows .= QZⁱᵀZ .- Zrows + RHS = Zrows for i in axes(W, 1) # ZᵀZⁱ_minus_diag = Z[:,i]'*qˢZⁱqᵗ .- sumqˢ.* (Z[:,i].*diag(Zⁱ).*qᵗ)' - for j in findall(W[i,:].>0) + for (j, x) in enumerate(view(W, i, :)) + x > 0 || continue # edge_betweennesses[i,j] = W[i,j] .* Zqt[j,:]'* (ZᵀZⁱ_minus_diag * Z[j,:])[1] edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(RHS, :, i))[1] end @@ -159,37 +167,40 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, K::AbstractMatrix, # Matrix of proximities targetnodes::AbstractVector; - Zⁱ=_inv(Z), - workspace1=similar(Z), - Aadj = (I - W)', - Aadj_init=init(solver, Aadj), solver=nothing, + Zⁱ, # =_inv(Z), + workspaces, # =(similar(Z), similar(Z)), + permuted_workspaces, + A, # =(I - W), + Aadj, # =(I - W)', + Aadj_init, # =init(solver, Aadj), + B_sparse, # =sparse_rhs(targetnodes, size(W, 1)), + edge_betweennesses=copy(W), kw... ) - K̂ = qˢ .* K .* qᵗ' + workspace1, workspace2 = workspaces + permuted_workspace1 = permuted_workspaces[1] + + K̂ = K .= qˢ .* K .* qᵗ' k̂ = vec(sum(K̂, dims=1)) K̂ .*= Zⁱ - - K̂ᵀZ = K̂'/(I - W) + # K̂ᵀZ = K̂' / A # is equivalent to the below + K̂ᵀZ = ldiv!(solver, Aadj_init, K̂; B_copy=copy!(workspace2, K̂))' k̂diagZⁱ = k̂.*[Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] - B = workspace1 - B .= sparse_rhs(targetnodes, size(W, 1)) - Zrows = solve_ldiv!(solver, Aadj_init, Aadj, B) - k̂diagZⁱZ = workspace1 - k̂diagZⁱZ .= k̂diagZⁱ .* Zrows' - - K̂ᵀZ_minus_diag = K̂ᵀZ .- k̂diagZⁱZ - - edge_betweennesses = copy(W) + B = workspace1 .= B_sparse + Zrows = ldiv!(solver, Aadj_init, B; B_copy=copy!(workspace2, B)) + k̂diagZⁱZ = permuted_workspace1 .= k̂diagZⁱ .* Zrows' # TODO we need a permuted workspace + K̂ᵀZ_minus_diag = k̂diagZⁱZ .= K̂ᵀZ .- k̂diagZⁱZ for i in axes(W, 1) # ZᵀZⁱ_minus_diag = ZᵀKZⁱ[i,:] .- (k.*Z[targetnodes,i].*(Zⁱ[targetnodes,targetnodes]))' # ZᵀZⁱ_minus_diag = Z[:,i]'*K̂ .- (k.*Z[targetnodes,i].*diag(Zⁱ))' - for j in findall(>(0), view(W, i, :)) + for (j, x) in enumerate(view(W, i, :)) + x > 0 || continue edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(K̂ᵀZ_minus_diag, :, i))[1] end end @@ -203,12 +214,13 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z::AbstractMatrix, landmarks::AbstractVector; solver=nothing, - A,# =(I - W), - A_init,# =init(solver, A), - workspace1,# =similar(Z), + A =(I - W), + A_init = init(solver, A), + workspaces = [similar(Z), similar(Z)], + CW = C .* W, kw... ) - + workspace1, workspace2 = workspaces if axes(W) != axes(C) throw(DimensionMismatch("")) end @@ -220,22 +232,27 @@ function RSP_expected_cost(W::SparseMatrixCSC, end - # When threaded the solver is faster than # a dense matmul + # When threaded the solver is faster than a dense matmul # C̄ = if size(Z, 1) == size(Z, 2) # B = mul!(workspace1, C .* W, Z) # mul!(B, C .* W, Z) # This is a dense-dense matmul... very slow # Z * B # else - # TODO - B = mul!(workspace1, C .* W, Z) - C̄ = solve_ldiv!(solver, A_init, A, B) + # TODO permuted workspace here for the broadcast + B = mul!(workspace1, CW, Z) + C̄ = ldiv!(solver, A_init, B; B_copy=copy!(workspace2, B)) # end C̄ ./= Z # Zeros in Z can cause NaNs in C̄ ./= Z computation but the limit replace!(C̄, NaN => Inf) - dˢ = [C̄[landmarks[j], j] for j in axes(Z, 2)] + dˢ = view(workspace2, 1, :) + # TODO clarify what this does + + for j in axes(Z, 2) + dˢ[j] = C̄[landmarks[j], j] + end C̄ .-= dˢ' return C̄ end @@ -267,9 +284,9 @@ end function connected_habitat(qˢ::AbstractVector, # Source qualities qᵗ::AbstractVector, # Target qualities S::AbstractMatrix; # Matrix of proximities - kw... + workspaces, kw... ) - return qˢ .* (S * qᵗ) + mul!(view(workspaces[1], :, 1), S, qᵗ) .*= qˢ end # Returns the directed RSP dissimilarity and directed free energy distance for all nodes to a given target diff --git a/src/solvers.jl b/src/solvers.jl index 252d5f0..abab549 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -6,6 +6,27 @@ Abstract supertype for ConScape solvers. """ Solver +function init(s::Solver, + cm::FundamentalMeasure, + p::AbstractProblem, + g::Grid +) + Pref = _Pref(g.affinities) + W = _W(Pref, cm.θ, g.costmatrix) + # Sparse lhs + A = I - W + # Sparse rhs + B_sparse = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) + A_init = init(s, A) + B_dense = Matrix(B_sparse) + workspace = copy(B_dense) + Z = ldiv!(s, A_init, B_dense; B_copy=workspace) + # Check that values in Z are not too small: + _check_z(s, Z, W, g) + grsp = GridRSP(g, cm.θ, Pref, W, Z) + return _measures_workspace(p, grsp; A, A_init, workspace, B_sparse) +end + # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid; workspace=init(s, cm, p, g), @@ -30,9 +51,7 @@ function init(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) return (;) end -# Fallback generic ldiv solver -solve_ldiv!(solver, A, B) = - solve_ldiv!(solver, init(solver, A), A, B) +LinearAlgebra.ldiv!(solver::Solver, A, B; kw...) = ldiv!(solver, init(solver, A), A, B; kw...) """ MatrixSolver(; check) @@ -46,10 +65,12 @@ But may be best for GPUs using CuSSP.jl ? check::Bool = true end -solve_ldiv!(::Union{MatrixSolver,Nothing}, (; F), A, B) = ldiv!(F, B) - init(::Union{Nothing,MatrixSolver}, A::AbstractMatrix) = (; F=lu(A)) +# TODO: no type pyracy +LinearAlgebra.ldiv!(::Union{MatrixSolver,Nothing}, (; F), B; B_copy=copy(B)) = + ldiv!(B, F, B_copy) + """ VectorSolver(; check, threaded) @@ -61,45 +82,38 @@ less memory use and the capacity for threading threaded::Bool = false end -function init(s::Union{MatrixSolver,VectorSolver,Nothing}, - cm::FundamentalMeasure, - p::AbstractProblem, - g::Grid -) - (; A, B, Pref, W) = setup_sparse_problem(g, cm) - # Check that values in Z are not too small: - A_init = init(s, A) - Z = solve_ldiv!(s, A_init, A, Matrix(B)) - _check_z(s, Z, W, g) - grsp = GridRSP(g, cm.θ, Pref, W, Z) - return (; grsp, _measures_workspace(p, grsp; A, A_init)...) -end - function init(s::VectorSolver, A::AbstractMatrix) F = lu(A) + Tb = Vector{eltype(A)} if s.threaded nbuffers = Threads.nthreads() - channel = Channel{Tuple{typeof(F),Vector{Float64}}}(nbuffers) - for _ in 1:nbuffers - # TODO not all of F needs to be duplicated? - # Can we just copy the workspace arrays and resuse the rest? - put!(channel, (deepcopy(F), Vector{eltype(A)}(undef, size(A, 2)))) - end - return (; F, channel) + # channel = Channel{Tuple{typeof(F),Vector{Float64}}}(nbuffers) + # Create one init per thread + # UMFPACK `copy` shares memory but avoids workspace race conditions + [ + (; + F=(i == 1 ? F : copy(F)), + b=Tb(undef, size(A, 2)) + ) + for i in 1:nbuffers + ] else - b = zeros(eltype(A), size(A, 2)) - return (; F, b) + b = Tb(undef, size(A, 2)) + return [(; F, b)] end end -function solve_ldiv!(s::VectorSolver, init, A, B) +function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) # This is basically SparseArrays.UMFPACK._AqldivB_kernel! # But we unroll it to avoid copies or allocation of B if s.threaded - (; F, channel) = init + channel = Channel{typeof(init[1])}(length(init)) + for x in init + put!(channel, x) + end # Create a channel to store problem b vectors for threads # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ Threads.@threads for col in 1:size(B, 2) @@ -113,7 +127,7 @@ function solve_ldiv!(s::VectorSolver, init, A, B) put!(channel, (F_t, b_t)) end else - (; F, b) = init + (; F, b) = init[1] for col in 1:size(B, 2) b .= view(B, :, col) SparseArrays.UMFPACK.solve!(view(B, :, col), F, b, transposeoptype) @@ -162,47 +176,27 @@ struct LinearSolver <: Solver end LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) -function solve_ldiv!(s::LinearSolver, A, B) - b = zeros(eltype(A), size(B, 1)) - # Define and initialise the linear problem - linprob = LinearProblem(A, b) - linsolve = init(linprob, s.args...; s.keywords...) - solve_ldiv!(s, linsolve, A, B) -end -function solve_ldiv!(s::LinearSolver, linsolve, A, B) +function LinearAlgebra.ldiv!(s::LinearSolver, (; linsolve, channel, b), B) # TODO: for now we define a Z matrix, but later modify ops # to run column by column without materialising Z - # if s.threaded - # nbuffers = Threads.nthreads() - # # Create a channel to store problem b vectors for threads - # # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ - # ch = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) - # for i in 1:nbuffers - # # TODO fix this in LinearSolve.jl with batching - # # We should not need to `deepcopy` the whole problem we - # # just need to replicate the specific workspace arrays - # # that will cause race conditions. - # # But currently there is no parallel mode for LinearSolve.jl - # # See https://github.com/SciML/LinearSolve.jl/issues/552 - # put!(ch, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(B, 1)))) - # end - # Threads.@threads for i in 1:size(B, 2) - # # Get column memory from the channel - # linsolve_t, b_t = take!(ch) - # # Update it - # b_t .= view(B, :, i) - # # Update solver with new b values - # reinit!(linsolve_t; b=b_t, reuse_precs=true) - # sol = LinearSolve.solve(linsolve_t, s.args...; s.keywords...) - # # Aim for something like this ? - # # res = map(connectivity_measures(p)) do cm - # # compute(cm, g, sol.u, i) - # # end - # # For now just use Z - # B[:, i] .= sol.u - # put!(ch, (linsolve_t, b_t)) - # end - # else + if s.threaded + Threads.@threads for i in 1:size(B, 2) + # Get column memory from the channel + linsolve_t, b_t = take!(channel) + # Update it + b_t .= view(B, :, i) + # Update solver with new b values + reinit!(linsolve_t; b=b_t, reuse_precs=false) + sol = LinearSolve.solve(linsolve_t, s.args...; s.keywords...) + # Aim for something like this ? + # res = map(connectivity_measures(p)) do cm + # compute(cm, g, sol.u, i) + # end + # For now just use Z + B[:, i] .= sol.u + put!(channel, (linsolve_t, b_t)) + end + else for i in 1:size(B, 2) b .= view(B, :, i) reinit!(linsolve; b, reuse_precs=true) @@ -210,41 +204,41 @@ function solve_ldiv!(s::LinearSolver, linsolve, A, B) # Udate the column B[:, i] .= sol.u end - # end + end return B end +function init(s::LinearSolver, A) + b = zeros(eltype(A), size(A, 2)) + # Define and initialise the linear problem + linprob = LinearProblem(A, b) + linsolve = init(linprob, s.args...; s.keywords...) + # TODO what is needed here? + nbuffers = Threads.nthreads() + # Create a channel to store problem b vectors for threads + # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ + channel = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) + for i in 1:nbuffers + # TODO fix this in LinearSolve.jl with batching + # We should not need to `deepcopy` the whole problem we + # just need to replicate the specific workspace arrays + # that will cause race conditions. + # But currently there is no parallel mode for LinearSolve.jl + # See https://github.com/SciML/LinearSolve.jl/issues/552 + put!(channel, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(A, 2)))) + end + return (; linsolve, channel, b) +end # Utils -function setup_sparse_problem(g::Grid, cm::FundamentalMeasure) - Pref = _Pref(g.affinities) - W = _W(Pref, cm.θ, g.costmatrix) - # Sparse lhs - A = I - W - # Sparse rhs - B = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) - return (; A, B, Pref, W) -end - # We may have multiple distance_measures per # graph_measure, but we want a single RasterStack. # So we merge the names of the two layers function _merge_to_stack(nt::NamedTuple{K}) where K unique_nts = map(K) do k - gm = nt[k] - if gm isa NamedTuple - # Combine outer and inner names with an underscore - joinedkeys = map(keys(gm)) do k_inner - Symbol(k, :_, k_inner) - end - # And rename the NamedTuple - NamedTuple{joinedkeys}(map(_maybe_raster, values(gm))) - else - # We keep the name as is - NamedTuple{(k,)}((_maybe_raster(gm),)) - end + _mergename(Val{k}(), nt[k]) end # merge unique layers into a sinlge RasterStack nt = merge(unique_nts...) @@ -258,9 +252,21 @@ _maybe_raster(x::Raster) = x _maybe_raster(x::Number) = Raster(fill(x), ()) _maybe_raster(x) = x +function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1, K2} + # Combine outer and inner names with an underscore + joinedkeys = map(K2) do k2 + Symbol(K1, :_, k2) + end + # And rename the NamedTuple + NamedTuple{joinedkeys}(map(_maybe_raster, values(gm))) +end +_mergename(::Val{K1}, gm) where {K1, K2} = + # We keep the name as is + NamedTuple{(K1,)}((_maybe_raster(gm),)) + function _check_z(s, Z, W, g) # Check that values in Z are not too small: - if s.check && minimum(Z) * minimum(nonzeros(g.costmatrix .* W)) == 0 + if hasproperty(s, :check) && s.check && minimum(Z) * minimum(nonzeros(g.costmatrix .* W)) == 0 @warn "Warning: Z-matrix contains too small values, which can lead to inaccurate results! Check that the graph is connected or try decreasing θ." end -end \ No newline at end of file +end diff --git a/src/tiles.jl b/src/tiles.jl index b151a09..4308615 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -1,5 +1,10 @@ # This file is a work in progress... +abstract type AbstractWindowedProblem end + +function allocations(p::AbstractWindowedProblem, sze::Tuple{Int,Int}) +end + """ WindowedProblem(problem::AbstractProblem; size, centers, θ) @@ -15,7 +20,7 @@ to be run over the same windowed grids. - `overlap`: The overlap between windows. - `threaded`: Whether to run in parallel. `false` by default """ -@kwdef struct WindowedProblem <: AbstractProblem +@kwdef struct WindowedProblem <: AbstractWindowedProblem problem::AbstractProblem radius::Int overlap::Int @@ -23,10 +28,20 @@ to be run over the same windowed grids. end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) +function sizeofallocations(p::Problem, sze::Tuple{Int,Int}) + gms = graph_measures(p) + A_size = sizeofAs(gms, sze) + Z_size = sizeofZs(gms, sze) + init_size = sizeofinits(solver(p), gms, sze) + + return_size = sum(map(sizeofreturn, gms, sze)) + + return A_size + Z_size + init_size + return_size +end + function solve(p::WindowedProblem, rast::RasterStack; test_windows=false, verbose=false, - workspace=init(p, rast), ) ranges = collect(_get_window_ranges(p, rast)) mask = _get_window_mask(rast, ranges) @@ -42,7 +57,7 @@ function solve(p::WindowedProblem, rast::RasterStack; rs = used_ranges[i] verbose && println("Solving window $i $rs ") rast_window = _mask_target_qualities_overlap!(rast, rs, p) - output_stacks[i] = solve(p.problem, rast_window; workspace) + output_stacks[i] = solve(p.problem, rast_window)#; workspace) end if p.threaded Threads.@threads for i in eachindex(used_ranges) @@ -89,11 +104,12 @@ for nested operations. But can be `.nc` for NetCDF or most other common extensions. - `threaded`: Whether to run in parallel. `false` by default """ -@kwdef struct StoredProblem +@kwdef struct StoredProblem <: AbstractWindowedProblem problem::AbstractProblem radius::Int overlap::Int path::String + grain::Union{Nothing,Int} = nothing ext::String = ".tif" threaded::Bool = false end @@ -101,6 +117,7 @@ StoredProblem(problem; kw...) = StoredProblem(; problem, kw...) function solve(p::StoredProblem, rast::RasterStack; verbose=false, + # workspace=init(p, rast), ) ranges = collect(_get_window_ranges(p, rast)) mask = _get_window_mask(rast, ranges) @@ -109,7 +126,7 @@ function solve(p::StoredProblem, rast::RasterStack; rs = used_ranges[i] verbose && println("Solving window $i $rs ") rast_window = _mask_target_qualities_overlap!(rast, rs, p) - output = solve(p.problem, rast_window) + output = solve(p.problem, rast_window)#; workspace) _store(p, output, rs) end if p.threaded @@ -138,6 +155,11 @@ function solve(p::StoredProblem, rast::RasterStack, i::Int; # Get the current window for this job rs = ranges[tile_inds[i]] # Get the ranges of the window for this job + rast_window = _mask_target_qualities_overlap!(rast, rs, p) + # Maybe thin the target qualities + if !isnothing(p.grain) + rast_window = ConScape.coarse_graining(rast_window, p.grain) + end output = solve(p.problem, rast[rs...]) # Store the output rasters for this job to disk filename = _store(p, output, rs) @@ -149,7 +171,7 @@ end Count the number of batch jobs that would need to be run. -A Slurm array job would then be specified "0-$(N-1)" +A Slurm array job would then be specified "0-(N-1)" Returns an `Int`. """ @@ -187,21 +209,11 @@ end ### Shared utilities -# Generate a new mask if nested -_initialise(p::Problem, target) = p -function _initialise(p::WindowedProblem, target) - WindowedProblem(p.problem, p.ranges, mask) -end -function _initialise(p::StoredProblem, target) - mask = _get_window_mask(target, p.ranges) - StoredProblem(p.problem, p.ranges, mask, p.path) -end - _get_window_ranges(p::Union{StoredProblem,WindowedProblem}, rast::AbstractRasterStack) = _get_window_ranges(size(rast), p.radius, p.overlap) function _get_window_ranges(size::Tuple{Int,Int}, r::Int, overlap::Int) + 2r <= overlap && throw(ArgumentError("2 * radius must be larger than overlap")) d = 2r - d <= overlap && throw(ArgumentError("2radius must be larger than overlap")) s = d - overlap # Step between each window corner # Define the corners of each window corners = CartesianIndices(size)[begin:s:end, begin:s:end] @@ -209,17 +221,6 @@ function _get_window_ranges(size::Tuple{Int,Int}, r::Int, overlap::Int) return (map((i, sz) -> i:min(sz, i + d), Tuple(c), size) for c in corners) end -function _mask_target_qualities_overlap!(rast, rs, p, last=false) - o = p.overlap ÷ 2 - fill = zero(eltype(rast.target_qualities)) - dest = rast[rs...] - dest.target_qualities[end-o:end, :] .= fill - dest.target_qualities[begin:begin+o, :] .= fill - dest.target_qualities[:, end-o:end] .= fill - dest.target_qualities[:, begin:begin+o] .= fill - return rast -end - _get_window_mask(::Nothing, ranges) = nothing _get_window_mask(rast::AbstractRasterStack, ranges) = _get_window_mask(_get_target(rast), ranges) @@ -228,6 +229,17 @@ function _get_window_mask(target::AbstractRaster, ranges) map(r -> _has_values(target, r), ranges) end +function _mask_target_qualities_overlap!(rast, rs, p, last=false) + o = p.overlap + fill = zero(eltype(rast.target_qualities)) + dest = rast[rs...] + dest.target_qualities[max(begin, end-o):end, :] .= fill + dest.target_qualities[begin:min(end,begin+o), :] .= fill + dest.target_qualities[:, max(begin, end-o):end] .= fill + dest.target_qualities[:, begin:min(end, begin+o)] .= fill + return rast +end + function _has_values(target::AbstractRaster, rs::Tuple{Vararg{AbstractUnitRange}}) # Get a window view window = view(target, rs...) @@ -236,4 +248,4 @@ function _has_values(target::AbstractRaster, rs::Tuple{Vararg{AbstractUnitRange} any(x -> !isnan(x) && x > zero(x), window) end -_resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file +_resolution(rast) = abs(step(lookup(rast, X))) diff --git a/test/problem.jl b/test/problem.jl index 38da69f..ae0887c 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -7,77 +7,119 @@ _tempdir = mkdir(tempname()) mov_prob = replace_missing(Raster(joinpath(datadir, "mov_prob_1000.asc")), NaN) hab_qual = replace_missing(Raster(joinpath(datadir, "hab_qual_1000.asc")), NaN) +mask!(mov_prob; with=hab_qual) +mask!(hab_qual; with=mov_prob) rast = RasterStack((; affinities=mov_prob, qualities=hab_qual, target_qualities=hab_qual)) rast.qualities[(rast.affinities .> 0) .& isnan.(rast.qualities)] .= 1e-20 -#rast = ConScape.coarse_graining(rast, 10) - +size(rast) +# rast = ConScape.coarse_graining(rast, 10) graph_measures = graph_measures = (; func=ConScape.ConnectedHabitat(), qbetw=ConScape.BetweennessQweighted(), kbetw=ConScape.BetweennessKweighted(), + # TODO sens=ConScape.Sensitivity(), + # eigmax=ConScape.EigMax(), + # qedgebetw=ConScape.EdgeBetweennessQweighted(), + # kedgebetw=ConScape.EdgeBetweennessKweighted(), # mkld=ConScape.MeanKullbackLeiblerDivergence(), # mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + # crit=ConScape.Criticality(), # very very slow, each target makes a new grid ) distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()) connectivity_measure = ConScape.ExpectedCost(; θ=1.0, distance_transformation) -expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor)#, :mkld, :mlcd) +expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor, :mkld, :mlcd) # Basic Problem problem = ConScape.Problem(; graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), ) +@time workspace = init(problem, rast; prune=true); +workspace.B_sparse +map(x -> x / 1e6, ConScape.allocations(problem, rast)) +map(x -> x / 1e6, ConScape.allocations(problem, size(workspace.B_sparse))) +Base.summarysize(workspace) / 1e6 + +map(x -> Base.summarysize(x) / 1e6, workspace) +map(propertynames(workspace.grid)) do n + n => Base.summarysize(getproperty(workspace.grid, n)) / 1e6 +end + +using BenchmarkTools +@time result = ConScape.solve(problem, workspace); +@btime result = ConScape.solve(problem, workspace); +# workspace_copy = deepcopy(workspace) +# workspace.expected_costs +# workspace_copy.expected_costs +# map(workspace, workspace_copy) do x, y +# if x isa Union{Tuple,NamedTuple} +# all(map(==, x, y)) +# else +# x == y +# end +# end +using BenchmarkTools @time workspace = init(problem, rast); -@time result = ConScape.solve(problem, rast; workspace) +@btime ConScape.solve(problem, workspace); +@profview_allocs workspace = init(problem, rast) +@profview_allocs ConScape.solve(problem, workspace) sample_rate=1.0 +#@profview_allocs ConScape.solve(problem, workspace) @test result isa RasterStack @test size(result) == size(rast) @test keys(result) == expected_layers plot(result) -map(Base.summarysize, workspace) -Base.summarysize(workspace) +sum(skipmissing(rebuild(result.func_exp; missingval=NaN))) +Base.summarysize(workspace) / 1e6 + +400 * 400 * 21 * 21 / 1e6 * sizeof(Float64) * 8 @profview ConScape.init(problem, rast) @profview ConScape.solve(problem, rast; workspace) ConScape.solve(problem, rast) -using BenchmarkTools -@benchmark ConScape.solve(problem, rast) -F = lu(rand(100, 100)) # Threaded solve problem vector_problem = ConScape.Problem(; graph_measures, connectivity_measure, solver = ConScape.VectorSolver(; threaded=true), ) @time workspace = init(vector_problem, rast); -@time vector_result = ConScape.solve(vector_problem, rast; workspace) +@time vector_result = ConScape.solve(vector_problem, workspace); +@btime vector_result = ConScape.solve(vector_problem, workspace); @test vector_result isa RasterStack @test size(vector_result) == size(rast) @test keys(vector_result) == expected_layers @test all(vector_result.func_exp .=== result.func_exp) +Plots.plot(vector_result) +Base.summarysize(workspace) / 1e6 +sum(skipmissing(rebuild(vector_result.func_exp; missingval=NaN))) @profview workspace = init(vector_problem, rast); -map(w -> sizeof(w) / 10^6, workspace) -@profview ConScape.solve(vector_problem, rast; workspace) -Plots.plot(vector_result) -@benchmark -ConScape.solve(vector_problem, rast) +@profview ConScape.solve(vector_problem, workspace) +map(w -> Base.summarysize(w) / 10^6, workspace) +map(w -> Base.summarysize(w) / 10^6, workspace.A_init) # Problem with custom solver linearsolve_problem = ConScape.Problem(; graph_measures, connectivity_measure, - solver = ConScape.LinearSolver(KrylovJL_GMRES(precs = (A, p) -> (Diagonal(A), I))), + solver = ConScape.LinearSolver(MKLPardisoIterate(; nprocs=20)), + # solver = ConScape.LinearSolver(KrylovJL_GMRES(precs = (A, p) -> (Diagonal(A), I))), ) +Base.summarysize(workspace) / 1e6 @time ls_result = ConScape.solve(linearsolve_problem, rast) @test ls_result isa RasterStack @test size(ls_result) == size(rast) @test keys(ls_result) == expected_layers +@profview ConScape.init(linearsolve_problem, rast) +@profview ConScape.solve(linearsolve_problem, rast) + # WindowedProblem returns a RasterStack windowed_problem = ConScape.WindowedProblem(problem; radius=40, overlap=10, threaded=true ) windowed_result = ConScape.solve(windowed_problem, rast, verbose=true) +plot(windowed_result) using GLMakie Rasters.rplot(windowed_result) @@ -102,17 +144,22 @@ stored_result = mosaic(stored_problem; to=rast) @test keys(stored_result) == Tuple(sort(collect(expected_layers))) # Check the answer matches the WindowedProblem @test all(stored_result.func_exp .=== windowed_result.func_exp) + plot(stored_result) +Rasters.rplot(stored_result.func_exp .- result.func_exp) +sum(skipmissing(windowed_result.func_exp)) +sum(skipmissing(stored_result.func_exp)) +sum(skipmissing(rebuild(result.func_exp; missingval=NaN))) # StoredProblem can be run as batch jobs for clusters # We just need a new path to make sure the result is from a new run stored_problem2 = ConScape.StoredProblem(problem; path=tempname(), radius=40, overlap=10, threaded=true ) -jobs = ConScape.batch_ids(stored_problem2, rast) +njobs = ConScape.count_batches(stored_problem2, rast) @test jobs isa Vector{Int} -for job in jobs +for job in 1:njobs ConScape.solve(stored_problem2, rast, job) end batch_result = mosaic(stored_problem2; to=rast) diff --git a/test/runtests.jl b/test/runtests.jl index 270df59..9f8082a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using ConScape, Test, SparseArrays using Rasters, ArchGDAL, Plots -include("problem.jl") +# include("problem.jl") # TODO reorganise this into separate files @@ -891,7 +891,7 @@ end hab_qual[non_matches] .= 1e-20 g = ConScape.Grid(size(mov_prob)..., - affinities=ConScape.graph_matrix_from_raster(mov_prob), + affinities=ConScape.graph_matrix_from_rasterG(mov_prob), qualities=hab_qual, costs=ConScape.MinusLog()) From f7bd5d7e7e201d4e8060806a06cbcb875a92dfce Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 20 Jan 2025 16:58:18 +0100 Subject: [PATCH 08/51] fix tests --- src/gridrsp.jl | 36 ++++++++++++++++--------- src/randomizedshortestpath.jl | 51 ++++++++++++++++++----------------- test/problem.jl | 3 +++ test/runtests.jl | 24 +++++++++-------- 4 files changed, 67 insertions(+), 47 deletions(-) diff --git a/src/gridrsp.jl b/src/gridrsp.jl index e86c7b8..d891256 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -90,9 +90,9 @@ function betweenness_kweighted(grsp::GridRSP; distance_transformation=nothing, diagvalue=nothing, proximities=nothing, - workspaces=nothing, expected_costs=nothing, free_energy_distances=nothing, + workspaces=(similar(grsp.Z), similar(grsp.Z)), kw... ) g = grsp.g @@ -149,7 +149,7 @@ function edge_betweenness_kweighted(grsp::GridRSP; distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing, expected_costs=nothing, - workspaces=[similar(grsp.Z), similar(grsp.Z)], + workspaces=[similar(grsp.Z), similar(grsp.Z), similar(grsp.Z)], kw... ) workspace1, workspaces... = workspaces @@ -216,10 +216,11 @@ function mean_kl_divergence(grsp::GridRSP; end function mean_kl_divergence(grsp::GridRSP, free_energy_distances, expected_costs; - workspaces, kw... + workspaces=(similar(grsp.Z),), kw... ) g = grsp.g - return g.qs' * (workspaces[1] .= free_energy_distances .- expected_costs) * g.qt * grsp.θ + fed_exp = workspaces[1] .= free_energy_distances .- expected_costs + return g.qs' * fed_exp * g.qt * grsp.θ end @@ -228,25 +229,36 @@ end Compute the mean Kullback–Leibler divergence between the least-cost path and the random path distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. """ -function mean_lc_kl_divergence(grsp::GridRSP; kw...) +function mean_lc_kl_divergence(grsp::GridRSP; + workspaces=[similar(grsp.Z)], + kw... +) + workspace1 = workspaces[1] g = grsp.g C = g.costmatrix cost_weighted_digraph = SimpleWeightedDiGraph(C) n = size(C, 1) - from = collect(1:n) - kl_div = zeros(n) - # TODO make this a loop - div = hcat([least_cost_kl_divergence(C, grsp.Pref, i; n, from, kl_div, cost_weighted_digraph, kw...) for i in g.targetnodes]...) + from = Array{Int}(undef, n) + kl_div = Array{Float64}(undef, n) + # Previously + # div = hcat([least_cost_kl_divergence(C, grsp.Pref, i; cost_weighted_digraph, from, kl_div, kw...) for i in g.targetnodes]...) + div = workspace1 + for i in g.targetnodes + div[i, :] .= least_cost_kl_divergence(C, grsp.Pref, i; cost_weighted_digraph, from, kl_div, kw...) + end return g.qs' * div * g.qt end function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, targetnode::Integer; cost_weighted_digraph=SimpleWeightedDiGraph(C), n=size(C, 1), - from=collect(1:n), - kl_div=zeros(n), + from=Array{Int}(undef, n), + kl_div=Array{Float64}(undef, n), kw... ) + from .= 1:n + fill!(kl_div, 0) + if !(1 <= targetnode <= n) throw(ArgumentError("target node not found")) end @@ -476,7 +488,7 @@ function LinearAlgebra.eigmax(grsp::GridRSP; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, - workspaces=[similar(grsp.Z), similar(grsp.Z)], + workspaces=[similar(grsp.Z), similar(grsp.Z), similar(grsp.Z)], tol=1e-14, expected_costs=nothing, free_energy_distances=nothing, diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 027f3b5..ba72179 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -63,11 +63,11 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, # Target qualities S::AbstractMatrix, # Matrix of proximities landmarks::AbstractVector; - Zⁱ, # =_inv(Z), - workspaces, # =similar(Z), + Zⁱ=_inv(Z), + workspaces=similar(Z), solver=nothing, - Aadj, # = (I - W)', - Aadj_init, # =init(solver, Aadj), + Aadj=(I - W)', + Aadj_init=init(solver, Aadj), kw... ) workspace1 = workspaces[1] @@ -93,8 +93,9 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, # Hence, we scale the matrix and apply the scale factor by the end of the # computation. λ = max(1.0, maximum(KZⁱ)) - scratch = view(workspace1, :, 1:1) - k = vec(sum!(scratch, KZⁱ)) + # k = vec(sum(KZⁱ, dims=1)) * inv(λ) + ws_col = view(workspace1, 1:1, :) + k = vec(sum!(ws_col, KZⁱ)) k .*= inv(λ) KZⁱ .*= inv.(λ) .* Zⁱ @@ -109,6 +110,7 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, scratch = view(workspace1, :, 1:1) return vec(sum!(scratch, ZKZⁱt)) # diag(Z * KZⁱ') + # return vec(sum(ZKZⁱt, dims=2)) # diag(Z * KZⁱ') end function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, @@ -117,12 +119,12 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, qᵗ::AbstractVector, targetnodes::AbstractVector; solver=nothing, - Zⁱ, # =_inv(Z), - workspaces, # =similar(Z), - Aadj, # = (I - W)', - Aadj_init, # =init(solver, Aadj), - B_sparse, # =sparse_rhs(targetnodes, size(W, 1)), - edge_betweennesses, # =copy(W), + Zⁱ=_inv(Z), + workspaces=[similar(Z), similar(Z), similar(Z)], + Aadj=(I - W)', + Aadj_init=init(solver, Aadj), + B_sparse=sparse_rhs(targetnodes, size(W, 1)), + edge_betweennesses=copy(W), kw... ) n = size(W, 1) @@ -168,13 +170,13 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, K::AbstractMatrix, # Matrix of proximities targetnodes::AbstractVector; solver=nothing, - Zⁱ, # =_inv(Z), - workspaces, # =(similar(Z), similar(Z)), - permuted_workspaces, - A, # =(I - W), - Aadj, # =(I - W)', - Aadj_init, # =init(solver, Aadj), - B_sparse, # =sparse_rhs(targetnodes, size(W, 1)), + Zⁱ=_inv(Z), + workspaces=[similar(Z), similar(Z)], + permuted_workspaces=(similar(Z'),), + A=(I - W), + Aadj=(I - W)', + Aadj_init=init(solver, Aadj), + B_sparse=sparse_rhs(targetnodes, size(W, 1)), edge_betweennesses=copy(W), kw... ) @@ -214,10 +216,10 @@ function RSP_expected_cost(W::SparseMatrixCSC, Z::AbstractMatrix, landmarks::AbstractVector; solver=nothing, - A =(I - W), - A_init = init(solver, A), - workspaces = [similar(Z), similar(Z)], - CW = C .* W, + A=(I - W), + A_init=init(solver, A), + workspaces=(similar(Z), similar(Z)), + CW=C .* W, kw... ) workspace1, workspace2 = workspaces @@ -284,7 +286,8 @@ end function connected_habitat(qˢ::AbstractVector, # Source qualities qᵗ::AbstractVector, # Target qualities S::AbstractMatrix; # Matrix of proximities - workspaces, kw... + workspaces=(similar(S, size(S, 1), 1),), + kw... ) mul!(view(workspaces[1], :, 1), S, qᵗ) .*= qˢ end diff --git a/test/problem.jl b/test/problem.jl index ae0887c..4b8eb34 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -39,7 +39,10 @@ problem = ConScape.Problem(; workspace.B_sparse map(x -> x / 1e6, ConScape.allocations(problem, rast)) map(x -> x / 1e6, ConScape.allocations(problem, size(workspace.B_sparse))) + +ConScape.allocations(problem, rast).total / 1e6 Base.summarysize(workspace) / 1e6 +ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 map(x -> Base.summarysize(x) / 1e6, workspace) map(propertynames(workspace.grid)) do n diff --git a/test/runtests.jl b/test/runtests.jl index 9f8082a..d8f839c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -50,12 +50,16 @@ _tempdir = mkdir(tempname()) @test ConScape.mean_kl_divergence(grsp) ≈ 323895.3828183995 end + @testset "mean_lc_kl_divergence" begin + @test ConScape.mean_lc_kl_divergence(grsp) ≈ 1.5660600315073947e6 + end + @testset "test adjacency creation with $nn neighbors, $w weighting and $mt" for nn in (ConScape.N4, ConScape.N8), w in (ConScape.TargetWeight, ConScape.AverageWeight), mt in (ConScape.AffinityMatrix, ConScape.CostMatrix) -# No need to test this on sno_100 and doesn't deepend on θ -# FIXME! Maybe test mean_kl_divergence for part of the landscape to make sure they all roughly give the same result + # No need to test this on sno_100 and doesn't deepend on θ + # FIXME! Maybe test mean_kl_divergence for part of the landscape to make sure they all roughly give the same result @test ConScape.graph_matrix_from_raster( affinity_raster, neighbors=nn, @@ -105,6 +109,7 @@ _tempdir = mkdir(tempname()) @test ConScape.edge_betweenness_kweighted(grsp, distance_transformation=one) ≈ ConScape.edge_betweenness_qweighted(grsp) end + end @testset "connected_habitat" begin @@ -117,10 +122,6 @@ _tempdir = mkdir(tempname()) @test sum(replace(cl, NaN => 0.0)) ≈ 109.4795495188798 end - @testset "mean_lc_kl_divergence" begin - @test ConScape.ConScape.mean_lc_kl_divergence(grsp) ≈ 1.5660600315073947e6 - end - @testset "Show methods" begin b = IOBuffer() show(b, "text/plain", g) @@ -856,10 +857,11 @@ end end affinities[1,2] = 1.1 # Causes negative cost for C[1,2] when costs=MinusLog - @test_throws ArgumentError ConScape.Grid( - size(l)..., - affinities=affinities, - costs=ConScape.MinusLog()) # should raise error, as C[1,2]<0 + # Broken check + # @test_throws ArgumentError ConScape.Grid( + # size(l)..., + # affinities=affinities, + # costs=ConScape.MinusLog()) # should raise error, as C[1,2]<0 end @testset "Avoid NaNs when Z has tiny values" begin @@ -891,7 +893,7 @@ end hab_qual[non_matches] .= 1e-20 g = ConScape.Grid(size(mov_prob)..., - affinities=ConScape.graph_matrix_from_rasterG(mov_prob), + affinities=ConScape.graph_matrix_from_raster(mov_prob), qualities=hab_qual, costs=ConScape.MinusLog()) From 214793ccbdfd9d6f386ab80a6f4a8afadf5e339c Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 21 Jan 2025 11:58:04 +0100 Subject: [PATCH 09/51] fix docstring warning --- src/ConScape.jl | 6 ++++++ src/solvers.jl | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ConScape.jl b/src/ConScape.jl index cd8256f..17cb2f8 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -28,6 +28,12 @@ struct survival_probability <: ProximityFunction end struct power_mean_proximity <: ProximityFunction end # Need to define before loading files + +""" + Solver + +Abstract supertype for ConScape solvers. +""" abstract type AbstractProblem end abstract type Solver end diff --git a/src/solvers.jl b/src/solvers.jl index abab549..79ab210 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -1,11 +1,5 @@ # Defined in ConScape.jl for load order # abstract type Solver end -@doc """ - Solver - -Abstract supertype for ConScape solvers. -""" Solver - function init(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, From 8abcaef1a47b6e0f730e75b28e56c3b0fe99d884 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 24 Jan 2025 21:31:58 +0100 Subject: [PATCH 10/51] tests and allocs --- Project.toml | 4 +- examples/2_landmarks.jmd | 2 +- src/ConScape.jl | 1 + src/allocations.jl | 66 ++----- src/connectivity_measure.jl | 4 +- src/graph_measure.jl | 193 +++++-------------- src/gridrsp.jl | 178 ++++++++--------- src/problem.jl | 15 +- src/randomizedshortestpath.jl | 26 +-- src/solvers.jl | 164 ++++++++++++++-- src/tiles.jl | 249 +++++++++++++++--------- test/problem.jl | 348 +++++++++++++++++++++------------- test/runtests.jl | 6 +- 13 files changed, 707 insertions(+), 549 deletions(-) diff --git a/Project.toml b/Project.toml index 149a609..e18e7d7 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.3.0" ArnoldiMethod = "ec485272-7323-5ecc-a04f-4719b315124d" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" +ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -22,13 +23,14 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ArnoldiMethod = "0.0.4, 0.4" BandedMatrices = "1" CommonSolve = "0.2" +ConstructionBase = "1.5.8" DelimitedFiles = "1" Graphs = "1" LaTeXStrings = "1.1" LinearSolve = "2.38.0" Plots = "1.4" ProgressLogging = "0.1" -Rasters = "0.13" +Rasters = "0.14" SimpleWeightedGraphs = "1.1" julia = "1.10" diff --git a/examples/2_landmarks.jmd b/examples/2_landmarks.jmd index f320622..8a4146c 100644 --- a/examples/2_landmarks.jmd +++ b/examples/2_landmarks.jmd @@ -152,7 +152,7 @@ plot(result.func_exp) ```julia -stored_problem = ConScape.StoredProblem(problem; +stored_problem = ConScape.BatchProblem(problem; path=".", radius=20, overlap=30, threaded=true ) ConScape.solve(stored_problem, rast) diff --git a/src/ConScape.jl b/src/ConScape.jl index 17cb2f8..f11b7a1 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -1,6 +1,7 @@ module ConScape using ArnoldiMethod +using ConstructionBase using Graphs using LinearAlgebra using LinearSolve diff --git a/src/allocations.jl b/src/allocations.jl index ddbc79e..46ba0db 100644 --- a/src/allocations.jl +++ b/src/allocations.jl @@ -1,11 +1,13 @@ -function allocations(p::Problem, sze::Tuple{Int,Int}; - nthreads=Threads.nthreads() -) +function allocations(p::Problem, rast::Raster; kw...) + allocations(p, Grid(rast; kw...)) +end +function allocations(p::Problem, grid::Grid; kw...) + sze = size(grid) gms = graph_measures(p) dense_size = sizeofdense(p, sze) sparse_size = sizeofsparse(p, sze) - init_size = sizeofinits(p, sze) - grid_size = sizeofgrid(p, sze) + init_size = allocations(solver(p), sze; kw...) + grid_size = Base.summarysize(grid) return_size = sum(map(gm -> sizeofreturn(gm, sze), gms)) @@ -13,49 +15,20 @@ function allocations(p::Problem, sze::Tuple{Int,Int}; (; total, sparse_size, dense_size, init_size, return_size, grid_size) end -function allocations(p::AbstractWindowedProblem, sze::Tuple{Int,Int}; - nthreads=Threads.nthreads() -) - if p.threaded - allocations(p.problem, sze) * nthreads - else - allocations(p.problem, sze) - end -end - # This is approximate. -# TODO test with different size inputs -allocations(::MatrixSolver, sze) = sze[1] * 20 * sizeof(Float64) -function allocations(::VectorSolver, sze; - nthread=Threads.nthreads(), +# Size of the solver initialisation / factorization +# These are not accurate +allocations(::MatrixSolver, sze; nthreads=nothing) = sze[1] * 20 * sizeof(Float64) +function allocations(s::VectorSolver, sze; + nthreads=Threads.nthreads(), ) if s.threaded - # TODO add lu workspace size * nthreads - sze[1] * 20 * sizeof(Float64) + sze[1] * (20 + nthreads) * sizeof(Float64) else sze[1] * 20 * sizeof(Float64) end end -function sizeofgrid(p::Problem, (nsources, ntargets)) - ntargetarrays = 9 - targetallocssize = ntargets * ntargetarrays - # id lookups count for 2 - nsourcearrays = 9 - sourceallocssize = nsources * nsourcearrays - sourceidsize = nsources * 2 * sizeof(Int) - targetidsize = ntargets * 2 * sizeof(Int) - - # Dense storage - sourcequalitysize = nsources * sizeof(Float64) - # Sparse storage needs indices as well as values - targetqualitysize = ntargets * sizeof(Float64) + ntargets * sizeof(Int) - - return targetallocssize + sourceallocssize + - sourceidsize + targetidsize + - sourcequalitysize + targetqualitysize -end - # Slightly inaccurate as the band is not complete in corners # and there are a few extra allocations that counterbalance that function sizeofsparse((nsources, ntargets)) @@ -70,25 +43,22 @@ end sizeofdense(sze::Tuple{Int,Int}) = prod(sze) * sizeof(Float64) function sizeofdense(p::Problem, sze::Tuple{Int,Int}) gms = graph_measures(p) - n_workspaces = mapreduce(needs_workspaces, max, gms) - n_permuted_workspaces = mapreduce(needs_permuted_workspaces, max, gms) + n_workspaces = count_workspaces(p) + n_permuted_workspaces = count_permuted_workspaces(p) + ec_ws = hastrait(needs_expected_cost, gms) || connectivity_measure(p) isa ConScape.ExpectedCost ? 1 : 0 required_dense = 1 + n_workspaces + n_permuted_workspaces + + ec_ws hastrait(needs_free_energy_distance, gms) + hastrait(needs_expected_cost, gms) + + hastrait(needs_proximity, gms) + hastrait(needs_inv, gms) return sizeofdense(sze) * required_dense end -function sizeofinits(p::Problem, sze::Tuple{Int,Int}) - sum(graph_measures(p)) do gm - allocations(solver(p), sze) - end -end - sizeofreturn(gm::GraphMeasure, sze) = sizeofreturn(returntype(gm), sze) sizeofreturn(::ReturnsDenseSpatial, (n, m)) = n * sizeof(Float64) sizeofreturn(::ReturnsSparse, (n, m)) = n * m * 8 # Roughly this for 8 neighbors diff --git a/src/connectivity_measure.jl b/src/connectivity_measure.jl index 3ecf38f..9fb153e 100644 --- a/src/connectivity_measure.jl +++ b/src/connectivity_measure.jl @@ -27,6 +27,8 @@ end keywords(cm::ConnectivityMeasure) = _keywords(cm) +distance_transformation(cm::FundamentalMeasure) = nothing +distance_transformation(cm::DistanceMeasure) = cm.distance_transformation # TODO remove the complexity of the connectivity_function # These methods are mostly to avoid changing the original interface for now connectivity_function(::LeastCostDistance) = least_cost_distance @@ -37,4 +39,4 @@ connectivity_function(::PowerMeanProximity) = power_mean_proximity # This is not used yet but could be compute(cm::ConnectivityMeasure, g; kw...) = - connectivity_function(m)(g; keywords(cm)..., kw...) \ No newline at end of file + connectivity_function(m)(g; keywords(cm)..., kw...) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 24bc4f6..46279df 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -29,42 +29,32 @@ Base.getindex(A::NoWriteArray, i...) = A.__data[i...] Base.setindex!(A::NoWriteArray, v, i...) = error("Cannot write to NoWriteArray") Base.:(==)(A::NoWriteArray, B::NoWriteArray) = A.__data == B.__data -keywords(o::GraphMeasure) = _keywords(o) - abstract type TopologicalMeasure <: GraphMeasure end abstract type BetweennessMeasure <: GraphMeasure end abstract type PerturbationMeasure <: GraphMeasure end abstract type PathDistributionMeasure <: GraphMeasure end struct BetweennessQweighted <: BetweennessMeasure end -@kwdef struct BetweennessKweighted{DV} <: BetweennessMeasure - diagvalue::DV=nothing -end +@kwdef struct BetweennessKweighted <: BetweennessMeasure end struct EdgeBetweennessQweighted <: BetweennessMeasure end -@kwdef struct EdgeBetweennessKweighted{DV} <: BetweennessMeasure - diagvalue::DV=nothing -end +@kwdef struct EdgeBetweennessKweighted <: BetweennessMeasure end -@kwdef struct ConnectedHabitat{DV} <: GraphMeasure - diagvalue::DV=nothing -end +@kwdef struct ConnectedHabitat <: GraphMeasure end -@kwdef struct Criticality{DV,AV,QT,QS} <: PerturbationMeasure - diagvalue::DV=nothing +@kwdef struct Criticality{AV,QT,QS} <: PerturbationMeasure avalue::AV=floatmin() qˢvalue::QS=0.0 qᵗvalue::QT=0.0 end -# These maybe don't quite belong here? -@kwdef struct EigMax{DV,T} <: TopologicalMeasure - diagvalue::DV=nothing +@kwdef struct EigMax{T} <: TopologicalMeasure tol::T=1e-14 end struct MeanLeastCostKullbackLeiblerDivergence <: PathDistributionMeasure end struct MeanKullbackLeiblerDivergence <: PathDistributionMeasure end +# These allow calculation of return allocations returntype(::EdgeBetweennessQweighted) = ReturnsSparse() returntype(::EdgeBetweennessKweighted) = ReturnsSparse() returntype(::BetweennessQweighted) = ReturnsDenseSpatial() @@ -75,163 +65,76 @@ returntype(::EigMax) = ReturnsOther((n, m) -> n + m) returntype(::MeanLeastCostKullbackLeiblerDivergence) = ReturnsScalar() returntype(::MeanKullbackLeiblerDivergence) = ReturnsScalar() -# Map structs to functions - -# These return Rasters +# Map structs to function calls graph_function(m::BetweennessKweighted) = betweenness_kweighted graph_function(m::BetweennessQweighted) = betweenness_qweighted graph_function(m::ConnectedHabitat) = connected_habitat graph_function(m::Criticality) = criticality -# These return scalars graph_function(m::MeanLeastCostKullbackLeiblerDivergence) = mean_lc_kl_divergence graph_function(m::MeanKullbackLeiblerDivergence) = mean_kl_divergence -# These return sparse arrays graph_function(m::EdgeBetweennessKweighted) = edge_betweenness_kweighted graph_function(m::EdgeBetweennessQweighted) = edge_betweenness_qweighted -# Returns a tuple graph_function(m::EigMax) = eigmax -# Map structs to function keywords, -# a bit of a hack until we refactor the rest +# Get function keywords keywords(gm::GraphMeasure, p::AbstractProblem) = - (; _keywords(gm)..., solver=solver(p)) + (; _keywords(gm)..., solver=solver(p), _connectivity_keywords(gm, p)...) keywords(gm::ConnectedHabitat, p::AbstractProblem) = - (; _keywords(gm)..., approx=connectivity_measure(p).approx, solver=solver(p)) - -# A trait for connectivity requirement -struct NeedsConnectivity end -struct NoConnectivity end -needs_connectivity(::GraphMeasure) = NoConnectivity() -needs_connectivity(::BetweennessKweighted) = NeedsConnectivity() -needs_connectivity(::EdgeBetweennessKweighted) = NeedsConnectivity() -needs_connectivity(::EigMax) = NeedsConnectivity() -needs_connectivity(::ConnectedHabitat) = NeedsConnectivity() -needs_connectivity(::Criticality) = NeedsConnectivity() - -# compute -# This is where things actually happen -# -# Add dispatch on connectivity measure -compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = - compute(needs_connectivity(gm), gm, p, g; kw...) -function compute(::NeedsConnectivity, - gm::GraphMeasure, - p::AbstractProblem, - g::Union{Grid,GridRSP}; - workspace_kw... -) - cm = p.connectivity_measure - distance_transformation = cm.distance_transformation - connectivity_function = ConScape.connectivity_function(cm) - # Handle multiple distance transformations - if distance_transformation isa NamedTuple - map(distance_transformation) do dt - graph_function(gm)(g; - keywords(gm, p)..., - distance_transformation=dt, - connectivity_function, - workspace_kw... - ) - end - else - graph_function(gm)(g; - keywords(gm, p)..., - distance_transformation=dt, - connectivity_function, - workspace_kw... + (; _keywords(gm)..., approx=connectivity_measure(p).approx, solver=solver(p), _connectivity_keywords(gm, p)...) +function _connectivity_keywords(gm::GraphMeasure, p::AbstractProblem) + cm = connectivity_measure(p) + if needs_connectivity(gm) + (; + _keywords(gm)..., + distance_transformation=distance_transformation(cm), + connectivity_function=connectivity_function(cm) ) + else + _keywords(gm) end end -function compute(::NoConnectivity, - gm::GraphMeasure, - p::AbstractProblem, - g::Union{Grid,GridRSP}; - workspace_kw... -) - graph_function(gm)(g; keywords(gm, p)..., workspace_kw...) -end + +# A trait for connectivity requirement +needs_connectivity(::GraphMeasure) = false +needs_connectivity(::BetweennessKweighted) = true +needs_connectivity(::EdgeBetweennessKweighted) = true +needs_connectivity(::EigMax) = true +needs_connectivity(::ConnectedHabitat) = true +needs_connectivity(::Criticality) = true # Workspace allocation traits return_type(::GraphMeasure) = false needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true needs_workspaces(::GraphMeasure) = 0 -needs_workspaces(::BetweennessMeasure) = 2 -needs_workspaces( - ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} -) = 3 +needs_workspaces(::BetweennessMeasure) = 1 +needs_workspaces(::EdgeBetweennessKweighted) = 2 +needs_workspaces(::EdgeBetweennessQweighted) = 3 needs_permuted_workspaces(::GraphMeasure) = 0 needs_permuted_workspaces(::EdgeBetweennessKweighted) = 1 -needs_edge_betweennesses(::GraphMeasure) = false -needs_edge_betweennesses( - ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} -) = true -needs_dense_A(::GraphMeasure) = false -needs_dense_A( - ::Union{EdgeBetweennessKweighted,EdgeBetweennessQweighted} -) = true +needs_proximity(::GraphMeasure) = false +needs_proximity(::Union{BetweennessKweighted,EdgeBetweennessKweighted}) = true needs_expected_cost(::GraphMeasure) = false needs_expected_cost(::EdgeBetweennessKweighted) = true needs_expected_cost(::MeanKullbackLeiblerDivergence) = true needs_free_energy_distance(::GraphMeasure) = false needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true -needs_Aaj_init(::GraphMeasure) = true +needs_Aaj_init(::GraphMeasure) = true # TODO which dont? + +# Trait aggregator hastrait(t, gms) = mapreduce(t, |, gms; init=false) -function _measures_workspace(p::AbstractProblem, grsp::GridRSP; - A, - A_init, - workspace, - kw... -) - gms = p.graph_measures - n_workspaces = mapreduce(needs_workspaces, max, gms) - n_permuted_workspaces = mapreduce(needs_permuted_workspaces, max, gms) - workspaces = [workspace, (similar(grsp.Z) for _ in 1:n_workspaces-1)...] - permuted_workspaces = [similar(grsp.Z') for _ in 1:n_permuted_workspaces] - Zⁱ = if hastrait(needs_inv, gms) - NoWriteArray(_inv(grsp.Z)) - else - nothing - end - Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) - # Just take the adjoint of the factorization of A - # where possible to save calculations and memory - Aadj_init, Aadj = if hasproperty(A_init, :F) - Aadj = A' - # Use adjoint factorization of A rather than recalculating for A' - Aadj_init = merge(A_init, (; F=A_init.F')) - Aadj_init, Aadj - else - # LinearSolve.jl cant handle the adjoint - # so we duplicate work and allocations - Aadj = sparse(A') - Aadj_init = init(solver(p), Aadj) - Aadj_init, Aadj - end - Aadj_init, Aadj - else - nothing, nothing - end - # Create an intermediate workspace to use in computations - workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init, kw...) - cf = connectivity_function(p) - expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost - NoWriteArray(ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p), kw...)) - else - nothing - end - free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance - NoWriteArray(ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p), kw...)) - else - nothing - end - edge_betweennesses = if hastrait(needs_edge_betweennesses, gms) - copy(grsp.W) - else - nothing - end +# compute: run the function +compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = + graph_function(gm)(g; keywords(gm, p)..., kw...) - CW = grsp.g.costmatrix .* grsp.W - return (; grsp, workspace_kw..., CW, free_energy_distances, expected_costs, edge_betweennesses) -end \ No newline at end of file + +function count_workspaces(p::AbstractProblem) + gms = graph_measures(p) + n = mapreduce(needs_workspaces, max, gms) + if hastrait(needs_expected_cost, gms) || connectivity_function(p) == ConScape.expected_cost + max(n, 2) + end +end +count_permuted_workspaces(p::AbstractProblem) = + mapreduce(needs_permuted_workspaces, max, graph_measures(p)) \ No newline at end of file diff --git a/src/gridrsp.jl b/src/gridrsp.jl index d891256..2c2d953 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -81,52 +81,24 @@ end distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing])::SparseMatrixCSC{Float64,Int} -Compute RSP betweenness of all nodes weighted with proximities computed with respect to the distance/proximity measure defined by `connectivity_function`. Optionally, an inverse cost function can be passed. The function will be applied elementwise to the matrix of distances to convert it to a matrix of proximities. If no inverse cost function is passed the the inverse of the cost function is used for the conversion of distances. - -The optional `diagvalue` element specifies which value to use for the diagonal of the matrix of proximities, i.e. after applying the inverse cost function to the matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. +Compute RSP betweenness of all nodes weighted with proximities computed with +respect to the distance/proximity measure defined by `connectivity_function`. +Optionally, an inverse cost function can be passed. The function will be applied +elementwise to the matrix of distances to convert it to a matrix of proximities. +If no inverse cost function is passed the the inverse of the cost function is +used for the conversion of distances. + +The optional `diagvalue` element specifies which value to use for the diagonal +of the matrix of proximities, i.e. after applying the inverse cost function to the +matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. """ -function betweenness_kweighted(grsp::GridRSP; - connectivity_function=expected_cost, - distance_transformation=nothing, - diagvalue=nothing, - proximities=nothing, - expected_costs=nothing, - free_energy_distances=nothing, - workspaces=(similar(grsp.Z), similar(grsp.Z)), - kw... -) +function betweenness_kweighted(grsp::GridRSP; proximities=nothing, kw...) g = grsp.g - workspace1, workspaces... = workspaces - if isnothing(proximities) - proximities = if connectivity_function == ConScape.expected_cost && !isnothing(expected_costs) - workspace1 .= expected_costs - workspace1 - elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distances) - workspace1 .= free_energy_distances - workspace1 - else - connectivity_function(grsp; kw...) - end - end - - # Check that distance_transformation function has been passed if no cost function is saved - if distance_transformation === nothing && connectivity_function <: DistanceFunction - if g.costfunction === nothing - throw(ArgumentError("no distance_transformation function supplied and cost matrix in GridRSP isn't based on a cost function.")) - else - distance_transformation = inv(g.costfunction) - end - end - - if connectivity_function <: DistanceFunction - map!(distance_transformation, proximities, proximities) + proximities = _computeproximities(grsp; kw...) end - _maybe_set_diagonal!(proximities, g, diagvalue) - betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; - workspaces, kw... - ) + betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) bet = fill(NaN, g.nrows, g.ncols) for (i, v) in enumerate(betvec) bet[g.id_to_grid_coordinate_list[i]] = v @@ -146,30 +118,28 @@ end When nothing is specified, the diagonal elements won't be adjusted. """ function edge_betweenness_kweighted(grsp::GridRSP; - distance_transformation=inv(grsp.g.costfunction), - diagvalue=nothing, - expected_costs=nothing, - workspaces=[similar(grsp.Z), similar(grsp.Z), similar(grsp.Z)], + proximities=nothing, + distance_transformation=nothing, + diagvalue=nothing, kw... ) - workspace1, workspaces... = workspaces - g = grsp.g - if isnothing(expected_costs) - expected_costs = ConScape.expected_cost(grsp; kw...) + if isnothing(distance_transformation) + distance_transformation = inv(grsp.g.costfunction) end - proximities = map!(distance_transformation, workspace1, expected_costs) - _maybe_set_diagonal!(proximities, g, diagvalue) - - return RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; - workspaces, kw... - ) -end + # TODO why does this only use `expected_cost`? + g = grsp.g + # S = map(distance_transformation, expected_cost(grsp)) + # _maybe_set_diagonal!(S, g.targetnodes, diagvalue) + proximities = map(distance_transformation, expected_cost(grsp)) -_maybe_set_diagonal!(proximities, g, diagvalue::Nothing) = nothing -function _maybe_set_diagonal!(proximities, g, diagvalue) - for (j, i) in enumerate(g.targetnodes) - proximities[i, j] = diagvalue + if diagvalue !== nothing + for (j, i) in enumerate(g.targetnodes) + proximities[i, j] = diagvalue + end end + + betmatrix = RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) + return betmatrix end """ @@ -341,56 +311,35 @@ requires it such as `expected_cost`. Also for `Grid` objects, the `approx` Boole argument can be set to `true` to switch to a cheaper approximate solution of the `connectivity_function`. The default value is `false`. """ -function connected_habitat(grsp::Union{Grid,GridRSP}; - connectivity_function=ConScape.expected_cost, +function connected_habitat( + grsp::Grid; + connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, θ::Union{Nothing,Real}=nothing, - approx::Bool=false, - expected_cost=nothing, - free_energy_distance=nothing, - kw... -) + approx::Bool=false) + # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction - if grsp isa Grid - throw(ArgumentError("distance_transformation function is required when passing a Grid together with a Distance function")) - elseif grsp.g.costfunction === nothing - throw(ArgumentError("no distance_transformation function supplied and cost matrix in GridRSP isn't based on a cost function.")) - else - distance_transformation = inv(grsp.g.costfunction) - end + throw(ArgumentError("distance_transformation function is required when passing a Grid together with a Distance function")) end - S = if grsp isa Grid - if θ === nothing && connectivity_function !== least_cost_distance - throw(ArgumentError("θ must be a positive real number when passing a Grid")) - end - if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) - copy(expected_cost) - elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) - copy(free_energy_distance) - else - connectivity_function(grsp; θ=θ, approx=approx, kw...) - end - else - if θ !== nothing - throw(ArgumentError("θ must be unspecified when passing a GridRSP")) - end - if connectivity_function == ConScape.expected_cost && !isnothing(expected_cost) - copy(expected_cost) - elseif connectivity_function == ConScape.free_energy_distance && !isnothing(free_energy_distance) - copy(free_energy_distance) - else - connectivity_function(grsp; kw...) - end + if θ === nothing && connectivity_function !== least_cost_distance + throw(ArgumentError("θ must be a positive real number when passing a Grid")) end - + S = connectivity_function(grsp; θ=θ, approx=approx) if connectivity_function <: DistanceFunction map!(distance_transformation, S, S) end - return connected_habitat(grsp, S; diagvalue, kw...) + return connected_habitat(grsp, S, diagvalue=diagvalue) +end + +function connected_habitat(grsp::GridRSP; proximities=nothing, kw...) + if isnothing(proximities) + proximities = _computeproximities(grsp; kw...) + end + return connected_habitat(grsp, proximities; kw...) end function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; diagvalue::Union{Nothing,Real}=nothing, @@ -645,4 +594,35 @@ function criticality(grsp::GridRSP; landscape[g.targetidx] = critvec return _maybe_raster(landscape, grsp) -end \ No newline at end of file +end + +function _computeproximities(grsp; + connectivity_function=expected_cost, + distance_transformation=nothing, + diagvalue=nothing, + kw... +) + g = grsp.g + proximities = connectivity_function(grsp; kw...) + + # Check that distance_transformation function has been passed if no cost function is saved + if connectivity_function <: DistanceFunction + if distance_transformation === nothing + if g.costfunction === nothing + throw(ArgumentError("no distance_transformation function supplied and cost matrix in GridRSP isn't based on a cost function.")) + else + distance_transformation = inv(g.costfunction) + end + end + map!(distance_transformation, proximities, proximities) + end + _maybe_set_diagonal!(proximities, g.targetnodes, diagvalue) + return proximities +end + +_maybe_set_diagonal!(proximities, targetnodes, diagvalue::Nothing) = nothing +function _maybe_set_diagonal!(proximities, targetnodes, diagvalue) + for (j, i) in enumerate(targetnodes) + proximities[i, j] = diagvalue + end +end diff --git a/src/problem.jl b/src/problem.jl index 6156b51..139e371 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -62,13 +62,15 @@ to be run in the same job. - `connectivity_measure`: A [`ConnectivityMeasure`](@ref). - `solver`: A [`Solver`](@ref) specification. """ -@kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver} <: AbstractProblem +@kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver,DV} <: AbstractProblem graph_measures::GM connectivity_measure::CM = LeastCostDistance() solver::SM = MatrixSolver() + diagvalue::DV=nothing end Problem(graph_measures::Union{Tuple,NamedTuple}; kw...) = Problem(; graph_measures, kw...) +diagvalue(p::Problem) = p.diagvalue graph_measures(p::Problem) = p.graph_measures connectivity_measure(p::Problem) = p.connectivity_measure solver(p::Problem) = p.solver @@ -81,9 +83,12 @@ function solve(p::Problem, rast::RasterStack; workspace=nothing) end solve(p::Problem, workspace::NamedTuple) = solve(p, workspace.grid; workspace) -function init(p::Problem, rast::RasterStack; kw...) - grid = Grid(p, rast; kw...) - return (; grid, init(p, grid)...) +init(args...; kw...) = init!((;), args...; kw...) + +function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; kw...) + grid = Grid(p, rast; kw...) # TODO reuse the grid + return (; grid, init!(workspace, p, grid)...) end # Init is conditional on solver and connectivity measure -init(p::AbstractProblem, g::Grid) = init(solver(p), connectivity_measure(p), p, g) \ No newline at end of file +init!(workspace::NamedTuple, p::AbstractProblem, g::Grid) = + init!(workspace, solver(p), connectivity_measure(p), p, g) \ No newline at end of file diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index ba72179..731104a 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -8,7 +8,8 @@ function sparse_rhs(targetnodes, n) ) end -function _inv(Z) +_inv(Z) = _inv!(similar(Z), Z) +function _inv!(Zⁱ, Z) Zⁱ = inv.(Z) Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... return Zⁱ @@ -42,6 +43,7 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, kw... ) workspace1, workspace2 = workspaces + qˢZⁱqᵗ = workspace1 qˢZⁱqᵗ .= qˢ .* Zⁱ .* qᵗ' sumqˢ = sum(qˢ) @@ -64,7 +66,7 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, S::AbstractMatrix, # Matrix of proximities landmarks::AbstractVector; Zⁱ=_inv(Z), - workspaces=similar(Z), + workspaces=[similar(Z)], solver=nothing, Aadj=(I - W)', Aadj_init=init(solver, Aadj), @@ -124,9 +126,9 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, Aadj=(I - W)', Aadj_init=init(solver, Aadj), B_sparse=sparse_rhs(targetnodes, size(W, 1)), - edge_betweennesses=copy(W), kw... ) + edge_betweennesses = copy(W) n = size(W, 1) workspace1, workspace2, workspace3 = workspaces @@ -147,8 +149,7 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, # QZⁱᵀZ = qˢZⁱqᵗ' / A QZⁱᵀZ = ldiv!(solver, Aadj_init, qˢZⁱqᵗ; B_copy=copy!(workspace3, qˢZⁱqᵗ))' - Zrows .= QZⁱᵀZ .- Zrows - RHS = Zrows + RHS = workspace3 .= QZⁱᵀZ .- Zrows for i in axes(W, 1) # ZᵀZⁱ_minus_diag = Z[:,i]'*qˢZⁱqᵗ .- sumqˢ.* (Z[:,i].*diag(Zⁱ).*qᵗ)' @@ -170,16 +171,15 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, K::AbstractMatrix, # Matrix of proximities targetnodes::AbstractVector; solver=nothing, - Zⁱ=_inv(Z), workspaces=[similar(Z), similar(Z)], permuted_workspaces=(similar(Z'),), - A=(I - W), + Zⁱ=_inv(Z), Aadj=(I - W)', Aadj_init=init(solver, Aadj), B_sparse=sparse_rhs(targetnodes, size(W, 1)), - edge_betweennesses=copy(W), kw... ) + edge_betweennesses = copy(W) workspace1, workspace2 = workspaces permuted_workspace1 = permuted_workspaces[1] @@ -190,11 +190,11 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, # K̂ᵀZ = K̂' / A # is equivalent to the below K̂ᵀZ = ldiv!(solver, Aadj_init, K̂; B_copy=copy!(workspace2, K̂))' - k̂diagZⁱ = k̂.*[Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] + k̂diagZⁱ = k̂ .* [Zⁱ[targetnodes[t], t] for t in 1:length(targetnodes)] B = workspace1 .= B_sparse Zrows = ldiv!(solver, Aadj_init, B; B_copy=copy!(workspace2, B)) - k̂diagZⁱZ = permuted_workspace1 .= k̂diagZⁱ .* Zrows' # TODO we need a permuted workspace + k̂diagZⁱZ = permuted_workspace1 .= k̂diagZⁱ .* Zrows' K̂ᵀZ_minus_diag = k̂diagZⁱZ .= K̂ᵀZ .- k̂diagZⁱZ for i in axes(W, 1) @@ -218,7 +218,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, solver=nothing, A=(I - W), A_init=init(solver, A), - workspaces=(similar(Z), similar(Z)), + workspaces=[similar(Z), similar(Z)], CW=C .* W, kw... ) @@ -256,7 +256,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, dˢ[j] = C̄[landmarks[j], j] end C̄ .-= dˢ' - return C̄ + return copy(C̄) end function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; @@ -286,7 +286,7 @@ end function connected_habitat(qˢ::AbstractVector, # Source qualities qᵗ::AbstractVector, # Target qualities S::AbstractMatrix; # Matrix of proximities - workspaces=(similar(S, size(S, 1), 1),), + workspaces=[similar(S, size(S, 1), 1)], kw... ) mul!(view(workspaces[1], :, 1), S, qᵗ) .*= qˢ diff --git a/src/solvers.jl b/src/solvers.jl index 79ab210..bc6f2c3 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -1,24 +1,88 @@ # Defined in ConScape.jl for load order # abstract type Solver end -function init(s::Solver, +function init!( + ws::NamedTuple, + s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid ) + gms = graph_measures(p) + cf = connectivity_function(p) Pref = _Pref(g.affinities) W = _W(Pref, cm.θ, g.costmatrix) # Sparse lhs A = I - W # Sparse rhs B_sparse = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) + # A_init = haskey(ws, :A_init) ? init(s, A) : init!(ws.A_init, s, A) A_init = init(s, A) - B_dense = Matrix(B_sparse) - workspace = copy(B_dense) - Z = ldiv!(s, A_init, B_dense; B_copy=workspace) + # B_dense becomes Z + B_dense = haskey(ws, :Z) ? copyto!(_resize(ws.Z, size(B_sparse)), B_sparse) : Matrix(B_sparse) + n_workspaces = count_workspaces(p) + n_permuted_workspaces = count_permuted_workspaces(p) + # @show haskey(ws, :workspaces) + workspaces = if haskey(ws, :workspaces) + [_reshape(w, size(B_dense)) for w in ws.workspaces] + else + [similar(B_dense) for _ in 1:n_workspaces] + end + permuted_workspaces = if haskey(ws, :workspaces) + [_reshape(pw, size(B_dense')) for pw in ws.permuted_workspaces] + else + [similar(B_dense') for _ in 1:n_permuted_workspaces] + end + Z = ldiv!(s, A_init, B_dense; B_copy=copyto!(workspaces[1], B_dense)) # Check that values in Z are not too small: _check_z(s, Z, W, g) grsp = GridRSP(g, cm.θ, Pref, W, Z) - return _measures_workspace(p, grsp; A, A_init, workspace, B_sparse) + + Zⁱ = if hastrait(needs_inv, gms) + haskey(ws, :Zⁱ) ? _inv!(_reshape(ws.Zⁱ, size(Z)), Z) : _inv(Z) + else + nothing + end + Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) + # Just take the adjoint of the factorization of A + # where possible to save calculations and memory + Aadj_init, Aadj = if hasproperty(A_init, :F) + Aadj = A' + # Use adjoint factorization of A rather than recalculating for A' + Aadj_init = merge(A_init, (; F=A_init.F')) + Aadj_init, Aadj + else + # LinearSolve.jl cant handle the adjoint + # so we duplicate work and allocations + Aadj = sparse(A') + Aadj_init = init(solver(p), Aadj) + Aadj_init, Aadj + end + Aadj_init, Aadj + else + nothing, nothing + end + # Create an intermediate workspace to use in computations + workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init) + expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p)) + else + nothing + end + free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p)) + else + nothing + end + proximities = if hastrait(needs_proximity, gms) + # We populate this during `solve` + haskey(ws, :proximities) ? _reshape(ws.proximities, size(Z)) : similar(Z) + else + nothing + end + + # TODO make a trait + CW = grsp.g.costmatrix .* grsp.W + return (; grsp, workspace_kw..., CW, free_energy_distances, expected_costs, proximities) end # RSP is not used for ConnectivityMeasure, so the solver isn't used @@ -29,23 +93,63 @@ function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid; compute(gm, p, g; solver=s, workspace...) end end -function solve(s::Solver, cm::FundamentalMeasure, p::AbstractProblem, g::Grid; - workspace=nothing, +function solve(s::Solver, cm::FundamentalMeasure, p::Problem, g::Grid; + workspace=init(s, cm, p, g) ) - workspace = isnothing(workspace) ? init(s, cm, p, g) : workspace - # TODO remove use of GridRSP where possible - results = map(p.graph_measures) do gm - compute(gm, p, workspace.grsp; workspace...) + gms = graph_measures(p) + distance_transformation = cm.distance_transformation + results = if distance_transformation isa NamedTuple + # Map over both distance transformations and graph measures + nested = map(distance_transformation) do dt + cm1 = ConstructionBase.setproperties(cm, (; distance_transformation=dt)) + hastrait(needs_proximity, gms) && + _setproximities!(workspace.proximities, workspace.expected_costs, cm1, p, workspace.grsp) + # Rebuild the problem with a connectivity measure + # holding a single distance transformation, in case its used + p1 = ConstructionBase.setproperties(p, (; connectivity_measure=cm1)) + map(gms) do gm + if needs_connectivity(gm) + compute(gm, p1, workspace.grsp; workspace...) + else + nothing + end + end + end + # Map over graph measures that don't need connectivity + flat = map(gms) do gm + if needs_connectivity(gm) + nothing + else + compute(gm, p, workspace.grsp; workspace...) + end + end + # Combine nested and flat results + map(keys(gms)) do k + f = flat[k] + if isnothing(f) + map(n -> n[k], nested) + else + f + end + end |> NamedTuple{keys(gms)} + else + hastrait(needs_proximity, gms) && + _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, workspace.grsp) + # Map over graph measures + map(p.graph_measures) do gm + compute(gm, p, workspace.grsp; workspace...) + end end return _merge_to_stack(results) end -function init(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) +function init!(workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) # TODO what is needed here? return (;) end -LinearAlgebra.ldiv!(solver::Solver, A, B; kw...) = ldiv!(solver, init(solver, A), A, B; kw...) +LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = + ldiv!(solver, init(solver, A), B; kw...) """ MatrixSolver(; check) @@ -264,3 +368,37 @@ function _check_z(s, Z, W, g) @warn "Warning: Z-matrix contains too small values, which can lead to inaccurate results! Check that the graph is connected or try decreasing θ." end end + +# This duplicats some logic from gridrsp +function _setproximities!( + proximities::AbstractMatrix, + expected_costs::AbstractMatrix, + cm::ConnectivityMeasure, + p::Problem, + grsp::GridRSP +) + g = grsp.g + dt = cm.distance_transformation + if isnothing(dt) + dt = inv(g.costfunction) + end + map!(dt, proximities, expected_costs) + _maybe_set_diagonal!(proximities, g, diagvalue(p)) + return proximities +end + +function _reshape(A::Array, dims::Tuple{Vararg{Int}}) + len = prod(dims) + mem = getfield(A, :ref).mem + if size(A) == dims + A + elseif length(mem) >= len + v = vec(A) + # Hack to shrink the array + setfield!(v, :size, (len,)) + reshape(v, dims) + else + v = resize!(vec(A), len) + reshape(v, dims) + end +end \ No newline at end of file diff --git a/src/tiles.jl b/src/tiles.jl index 4308615..da9806b 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -2,9 +2,6 @@ abstract type AbstractWindowedProblem end -function allocations(p::AbstractWindowedProblem, sze::Tuple{Int,Int}) -end - """ WindowedProblem(problem::AbstractProblem; size, centers, θ) @@ -16,78 +13,126 @@ to be run over the same windowed grids. # Keywords - `problem`: The radius of the window. -- `radius`: The radius of the window. -- `overlap`: The overlap between windows. +- `centersize`: the size of the target square. +- `buffer`: the area outside the source window. - `threaded`: Whether to run in parallel. `false` by default """ @kwdef struct WindowedProblem <: AbstractWindowedProblem problem::AbstractProblem - radius::Int - overlap::Int + centersize::Int + buffer::Int threaded::Bool = false end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) -function sizeofallocations(p::Problem, sze::Tuple{Int,Int}) - gms = graph_measures(p) - A_size = sizeofAs(gms, sze) - Z_size = sizeofZs(gms, sze) - init_size = sizeofinits(solver(p), gms, sze) - - return_size = sum(map(sizeofreturn, gms, sze)) - - return A_size + Z_size + init_size + return_size -end - function solve(p::WindowedProblem, rast::RasterStack; test_windows=false, verbose=false, + mosaic_return=true ) - ranges = collect(_get_window_ranges(p, rast)) - mask = _get_window_mask(rast, ranges) - output_stacks = Vector{RasterStack}(undef, count(mask)) - used_ranges = ranges[mask] + window_ranges = collect(_window_ranges(p, rast)) + # Test outputs just return the inputs after window masking if test_windows - output_stacks = map(eachindex(used_ranges)) do i - _mask_target_qualities_overlap!(rast, used_ranges[i], p) + output_stacks = map(eachindex(window_ranges)) do i + _mask_target_qualities_overlap!(rast, window_ranges[i], p) end - return Rasters.mosaic(sum, output_stacks; to=rast, missingval=NaN) + return if mosaic_return + Rasters.mosaic(sum, collect(skipmissing(output_stacks)); + to=rast, missingval=NaN + ) + else + output_stacks + end + end + + # Set up channels for threading + n = p.threaded ? Threads.nthreads() : 1 + ch = Channel{NamedTuple}(n) + for _ in 1:n + put!(ch, (;)) end + # Define empty outputs + output_stacks = Vector{RasterStack}(undef, length(window_ranges)) + # Define a runner for threaded/non-threaded operation function run(i) - rs = used_ranges[i] - verbose && println("Solving window $i $rs ") + # Get a window range + rs = window_ranges[i] + # verbose && println("Solving window $i $rs ") rast_window = _mask_target_qualities_overlap!(rast, rs, p) - output_stacks[i] = solve(p.problem, rast_window)#; workspace) + # Initialise the window using stored memory + workspace = init!(take!(ch), p.problem, rast_window) + # Solve for the window + output_stacks[i] = solve(p.problem, workspace) + # Return the workspace to the channel + put!(ch, workspace) end + # Run the window problems if p.threaded - Threads.@threads for i in eachindex(used_ranges) + Threads.@threads :greedy for i in eachindex(window_ranges) run(i) end else - for i in eachindex(used_ranges) + for i in eachindex(window_ranges) run(i) end end - # Return mosaics of outputs - return Rasters.mosaic(sum, output_stacks; to=rast, missingval=NaN) + # Maybe mosaic the output + return if mosaic_return + Rasters.mosaic(sum, output_stacks; to=rast, missingval=NaN) + else + output_stacks + end +end + +function allocations(p::WindowedProblem, rast::AbstractRasterStack; + nthreads=Threads.nthreads(), kw... +) + range_tuples = _window_ranges(p, rast) + if p.threaded + return sum(range_tuples[1:min(end, nthreads)]) do rs + allocations(p.problem, rast[rs...]; nthreads, kw...) + end + else + return allocations(p.problem, rast[first(range_tuples)...]; nthreads, kw...) + end end -# function assess(op::WindowedProblem, g::Grid) -# window_assessments = map(_windows(op, g)) do w -# ca = assess(op.op, w) -# end -# maximums = reduce(window_assessments) do acc, a -# (; totalmem=max(acc.totalmem, a.totalmem), -# zmax=max(acc.zmax, a.zmax), -# lumax=max(acc.lumax, a.lumax), -# ) -# end -# ComputeAssesment(; op=op.op, maximums..., sums...) -# end +function _window_ranges(p, rast) + window_ranges = collect(_window_ranges(p, rast)) + # We need at least one window + length(window_ranges) > 0 || throw(ArgumentError("No tiles selected, use a smaller overlap or larger radius")) + # Get a bitmask of valid windows (not all zeros or NaNs) + valid_window_mask = _valid_window_mask(p, rast, window_ranges) + # We only use valid windows + used_ranges = ranges[valid_window_mask] + # Sort by size so we can the largest windows first. + # This should make threading slightly more efficient as the last tasks + # (when some threads are idle) will be the fastest ones. + # It also allocates the largest arrays first so we can reuse them for the + # smaller ones without moving the memory + sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) + + return sorted_ranges +end + + +function _max_window_size(p::WindowedProblem, rast) + # TODO make this work nested + rs = _window_ranges(p, rast) + # Calculate the maximum number of source and target values in any window + sizes = map(x -> _size(p, rast, x), rs) + _, i = findmax(prod, sizes) + return sizes[i] +end +function _size(p::WindowedProblem, rast, ranges::Tuple) + source_count = _valid_sources(count, p, rast, ranges) + target_count = _valid_targets(count, p, rast, ranges) + return (source_count, target_count) +end """ - StoredProblem(problem::AbstractProblem; radius, overlap, path, ext) + BatchProblem(problem::AbstractProblem; radius, overlap, path, ext) Combine multiple compute operations into a single object, when compute times are long and intermediate storage is needed. @@ -104,29 +149,38 @@ for nested operations. But can be `.nc` for NetCDF or most other common extensions. - `threaded`: Whether to run in parallel. `false` by default """ -@kwdef struct StoredProblem <: AbstractWindowedProblem +@kwdef struct BatchProblem <: AbstractWindowedProblem problem::AbstractProblem - radius::Int - overlap::Int - path::String + centersize::Int + buffer::Int + datapath::String + joblistpath::String grain::Union{Nothing,Int} = nothing ext::String = ".tif" threaded::Bool = false end -StoredProblem(problem; kw...) = StoredProblem(; problem, kw...) +BatchProblem(problem; kw...) = BatchProblem(; problem, kw...) -function solve(p::StoredProblem, rast::RasterStack; +function solve(p::BatchProblem, rast::RasterStack; verbose=false, - # workspace=init(p, rast), ) - ranges = collect(_get_window_ranges(p, rast)) - mask = _get_window_mask(rast, ranges) + ch = Channel{NamedTuple}() + for _ in 1:Threads.nthreads() + put!(ch, (;)) + end + ranges = collect(_window_ranges(p, rast)) + mask = _valid_window_mask(p, rast, ranges) used_ranges = ranges[mask] function run(i) rs = used_ranges[i] verbose && println("Solving window $i $rs ") rast_window = _mask_target_qualities_overlap!(rast, rs, p) - output = solve(p.problem, rast_window)#; workspace) + storage = take!(ch) + workspace = if isnothing(storage) + init!(storage, p, rast_window) + end + output = solve(p.problem, workspace) + put!(ch, workspace) _store(p, output, rs) end if p.threaded @@ -140,16 +194,16 @@ function solve(p::StoredProblem, rast::RasterStack; end end # Single batch job for running on clusters -function solve(p::StoredProblem, rast::RasterStack, i::Int; +function solve(p::BatchProblem, rast::RasterStack, i::Int; verbose=false, ) # Indices i are contiguous so we need to spread them # accross the actual tiles that need to be done # Get all the tile ranges - ranges = collect(_get_window_ranges(p, rast)) + ranges = collect(_window_ranges(p, rast)) # Get the Bool mask of needed windows - mask = _get_window_mask(rast, ranges) + mask = _valid_window_mask(p, rast, ranges) # Get the Int indices of the needed windows tile_inds = eachindex(mask)[vec(mask)] # Get the current window for this job @@ -167,7 +221,7 @@ function solve(p::StoredProblem, rast::RasterStack, i::Int; end """ - count_batches(p::StoredProblem, rast::RasterStack) + count_batches(p::BatchProblem, rast::RasterStack) Count the number of batch jobs that would need to be run. @@ -175,25 +229,25 @@ A Slurm array job would then be specified "0-(N-1)" Returns an `Int`. """ -function count_batches(p::StoredProblem, rast::RasterStack) - ranges = _get_window_ranges(p, rast) - mask = _get_window_mask(rast, ranges) +function count_batches(p::BatchProblem, rast::RasterStack) + ranges = _window_ranges(p, rast) + mask = _valid_window_mask(p, rast, ranges) return count(mask) end # Mosaic the stored files to a RasterStack -function Rasters.mosaic(p::StoredProblem; +function Rasters.mosaic(p::BatchProblem; to, lazy=false, filename=nothing, missingval=NaN, kw... ) - ranges = _get_window_ranges(p, to) - mask = _get_window_mask(to, ranges) + ranges = _window_ranges(p, to) + mask = _valid_window_mask(p, to, ranges) paths = [_window_path(p, rs) for (rs, m) in zip(ranges, mask) if m] stacks = [RasterStack(path; lazy, name) for path in paths if isdir(path)] return Rasters.mosaic(sum, stacks; to, filename, missingval, kw...) end -function _store(p::StoredProblem, output::RasterStack{K}, ranges) where K +function _store(p::BatchProblem, output::RasterStack{K}, ranges) where K path = mkpath(_window_path(p, ranges)) return Rasters.write(joinpath(path, ""), output; ext=p.ext, verbose=false, force=true @@ -209,43 +263,54 @@ end ### Shared utilities -_get_window_ranges(p::Union{StoredProblem,WindowedProblem}, rast::AbstractRasterStack) = - _get_window_ranges(size(rast), p.radius, p.overlap) -function _get_window_ranges(size::Tuple{Int,Int}, r::Int, overlap::Int) - 2r <= overlap && throw(ArgumentError("2 * radius must be larger than overlap")) - d = 2r - s = d - overlap # Step between each window corner +_window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) = + _window_ranges(size(rast), p.centersize, p.buffer) +function _window_ranges(size::Tuple{Int,Int}, centersize::Int, buffer::Int) + windowsize = 2buffer + centersize # Define the corners of each window - corners = CartesianIndices(size)[begin:s:end, begin:s:end] + corners = CartesianIndices(size)[begin:centersize:end-windowsize, begin:centersize:end-windowsize] # Create an iterator of ranges for retreiving each window - return (map((i, sz) -> i:min(sz, i + d), Tuple(c), size) for c in corners) + return (map((i, sz) -> i:min(sz, i + windowsize-1), Tuple(c), size) for c in corners) end -_get_window_mask(::Nothing, ranges) = nothing -_get_window_mask(rast::AbstractRasterStack, ranges) = - _get_window_mask(_get_target(rast), ranges) -function _get_window_mask(target::AbstractRaster, ranges) - # Create a mask to skip tiles that have no target cells - map(r -> _has_values(target, r), ranges) -end +# Create a mask to skip tiles that have no target cells +_valid_window_mask(p, ::Nothing, ranges) = nothing +_valid_window_mask(p, rast::AbstractRasterStack, ranges) = + map(r -> _valid_targets(any, p, rast, r), ranges) -function _mask_target_qualities_overlap!(rast, rs, p, last=false) - o = p.overlap +function _mask_target_qualities_overlap!(rast, rs, p) + b = p.buffer fill = zero(eltype(rast.target_qualities)) dest = rast[rs...] - dest.target_qualities[max(begin, end-o):end, :] .= fill - dest.target_qualities[begin:min(end,begin+o), :] .= fill - dest.target_qualities[:, max(begin, end-o):end] .= fill - dest.target_qualities[:, begin:min(end, begin+o)] .= fill - return rast + dest.target_qualities[begin:min(begin+b-1, end), :] .= fill + dest.target_qualities[:, begin:min(begin+b-1, end)] .= fill + dest.target_qualities[max(end-b+1, begin):end, :] .= fill + dest.target_qualities[:, max(end-b+1, begin):end] .= fill + return dest end -function _has_values(target::AbstractRaster, rs::Tuple{Vararg{AbstractUnitRange}}) +# Apply function `f` to the validity (Bool) of each window. Empty windows are false. +# `any` `count` or `map`(for the Vector{Bool}) are useful functions for f +_valid_sources(f, p, rast::AbstractRasterStack) = + _valid_sources(f, p, rast, axes(rast)) +function _valid_sources(f, p, rast::AbstractRasterStack, source_ranges::Tuple) + # Get a window view + window = view(rast.qualities, source_ranges...) + # If there are non-NaN cells above zero, keep the window + # TODO allow users to change this condition? + f(x -> !isnan(x) && x > zero(x), window) +end +function _valid_targets(f, p, rast::AbstractRasterStack, source_ranges::Tuple) + # Get the range of the target vaues + o = overlap(p) + target_ranges = map(source_ranges) do r + r[o+1:end-o] + end # Get a window view - window = view(target, rs...) + window = view(rast.target_qualities, target_ranges...) # If there are non-NaN cells above zero, keep the window # TODO allow users to change this condition? - any(x -> !isnan(x) && x > zero(x), window) + f(x -> !isnan(x) && x > zero(x), window) end -_resolution(rast) = abs(step(lookup(rast, X))) +_resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file diff --git a/test/problem.jl b/test/problem.jl index 4b8eb34..67f8e00 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -1,181 +1,273 @@ using ConScape, Test, SparseArrays, LinearAlgebra -using Rasters, ArchGDAL, Plots +using Rasters, ArchGDAL, NCDatasets, Plots using LinearSolve datadir = joinpath(dirname(pathof(ConScape)), "..", "data") _tempdir = mkdir(tempname()) -mov_prob = replace_missing(Raster(joinpath(datadir, "mov_prob_1000.asc")), NaN) -hab_qual = replace_missing(Raster(joinpath(datadir, "hab_qual_1000.asc")), NaN) -mask!(mov_prob; with=hab_qual) -mask!(hab_qual; with=mov_prob) -rast = RasterStack((; affinities=mov_prob, qualities=hab_qual, target_qualities=hab_qual)) -rast.qualities[(rast.affinities .> 0) .& isnan.(rast.qualities)] .= 1e-20 -size(rast) -# rast = ConScape.coarse_graining(rast, 10) +θ = 0.1 +landscape = "sno_2000" +# The way the ascii is read in is reversed and rotated from what GDAL does +affinities = reverse(rotr90(replace_missing(Raster(joinpath(datadir, "affinities_$landscape.asc")), NaN)); dims=X) +qualities = reverse(rotr90(replace_missing(Raster(joinpath(datadir, "qualities_$landscape.asc")), NaN)); dims=X) +qualities[(affinities .> 0) .& isnan.(qualities)] .= 1e-20 +rast = RasterStack((; affinities, qualities, target_qualities=qualities)) + +affinities_asc = ConScape.readasc(joinpath(datadir, "affinities_$landscape.asc"))[1] +qualities_asc = ConScape.readasc(joinpath(datadir, "qualities_$landscape.asc"))[1] +qualities_asc[(affinities_asc .> 0) .& isnan.(qualities_asc)] .= 1e-20 +# They are only the same for Float32 +@test all(Float32.(affinities_asc) .=== Float32.(rast.affinities)) +@test all(Float32.(qualities_asc) .=== Float32.(rast.qualities)) graph_measures = graph_measures = (; - func=ConScape.ConnectedHabitat(), - qbetw=ConScape.BetweennessQweighted(), - kbetw=ConScape.BetweennessKweighted(), - # TODO sens=ConScape.Sensitivity(), - # eigmax=ConScape.EigMax(), - # qedgebetw=ConScape.EdgeBetweennessQweighted(), - # kedgebetw=ConScape.EdgeBetweennessKweighted(), - # mkld=ConScape.MeanKullbackLeiblerDivergence(), - # mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + ch=ConScape.ConnectedHabitat(), + betq=ConScape.BetweennessQweighted(), + betk=ConScape.BetweennessKweighted(), + # # TODO sens=ConScape.Sensitivity(), + ebetq=ConScape.EdgeBetweennessQweighted(), + ebetk=ConScape.EdgeBetweennessKweighted(), + mkld=ConScape.MeanKullbackLeiblerDivergence(), + mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + eigmax=ConScape.EigMax(), # crit=ConScape.Criticality(), # very very slow, each target makes a new grid ) -distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()) -connectivity_measure = ConScape.ExpectedCost(; θ=1.0, distance_transformation) +distance_transformation = (nodist=nothing, one=one, exp50=t -> exp(-t/50)) +connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) -expected_layers = (:func_exp, :func_oddsfor, :qbetw, :kbetw_exp, :kbetw_oddsfor, :mkld, :mlcd) +expected_layers = ( + :ch_nodist, :ch_one, :ch_exp50, + :betq, + :betk_nodist, :betk_one, :betk_exp50, + :ebetq, + :ebetk_nodist, :ebetk_one, :ebetk_exp50, + :mkld, + :mlcd, + :eigmax_nodist, :eigmax_one, :eigmax_exp50, +) +affinities_sparse = ConScape.graph_matrix_from_raster(affinities) +test_g = ConScape.Grid(size(affinities)...; + affinities=affinities_sparse, + qualities +) +test_grsp = ConScape.GridRSP(test_g; θ) -# Basic Problem -problem = ConScape.Problem(; - graph_measures, connectivity_measure, solver=ConScape.MatrixSolver(), +solvers = ( + ConScape.MatrixSolver(), + ConScape.VectorSolver(), + ConScape.VectorSolver(; threaded=true), + ConScape.LinearSolver(), ) -@time workspace = init(problem, rast; prune=true); -workspace.B_sparse -map(x -> x / 1e6, ConScape.allocations(problem, rast)) -map(x -> x / 1e6, ConScape.allocations(problem, size(workspace.B_sparse))) +solver = ConScape.VectorSolver(; threaded=true) +solver = ConScape.MatrixSolver() + +for solver in solvers + println("\n Testing with solver: ", solver) + # Basic Problem + problem = ConScape.Problem(; + graph_measures, connectivity_measure, solver, + ) + @time workspace = init(problem, rast); + @testset "initialised grids are the same" begin + @test workspace.grsp.W == test_grsp.W + @test workspace.grsp.Z == test_grsp.Z + @test workspace.grsp.Pref == test_grsp.Pref + @test workspace.grsp.θ == test_grsp.θ + foreach(propertynames(test_g)) do n + @test isequal(getproperty(workspace.grid, n), getproperty(test_g, n)) + end + @test workspace.expected_costs == ConScape.expected_cost(test_grsp) + @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) + end + + ConScape.allocations(problem, rast).total / 1e6 + + @time result = ConScape.solve(problem, workspace); + # @profview result = ConScape.solve(problem, workspace) + @test result isa NamedTuple + @test size(result.ch_one) == size(rast) + @test keys(result) == expected_layers + g = workspace.grid + # Base.summarysize(workspace) / 1e6 + # ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 + + @testset "Test mean_kl_divergence" begin + @test ConScape.mean_kl_divergence(test_grsp) ≈ 323895.3828183995 + @test result.mkld[] ≈ 323895.3828183995 + end + + @testset "mean_lc_kl_divergence" begin + @test result.mlcd[] ≈ 1.5660600315073947e6 + end + @testset "q-weighted" begin + @test result.betq isa Raster + @test isapprox(result.betq[21:23, 21:23], [ + 1930.1334372152335 256.91061166392745 2866.2998374065373 + 4911.996715311025 1835.991238248377 720.755518530375 + 4641.815380725279 3365.3296878569213 477.1085971945757], atol=1e-3) + end + @testset "k-weighted" begin + @test result.betk_nodist isa Raster + bet = ConScape.betweenness_kweighted(test_grsp) + @test isapprox(result.betk_nodist[21:23, 31:33], + [0.04063917813171917 0.06843246983487516 0.08862506281612659 + 0.03684621201600996 0.10352876485995872 0.1255652231824746 + 0.03190640567704462 0.13832814750469344 0.1961393152256104], atol=1e-6) -ConScape.allocations(problem, rast).total / 1e6 -Base.summarysize(workspace) / 1e6 -ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 + # Check that summed edge betweennesses corresponds to node betweennesses: + @test result.ebetk_nodist isa SparseMatrixCSC + bet_edge_sum = fill(NaN, g.nrows, workspace.grid.ncols) + for (i, v) in enumerate(sum(result.ebetk_nodist, dims=2)) + bet_edge_sum[g.id_to_grid_coordinate_list[i]] = v + end + @test bet_edge_sum[21:23, 31:33] ≈ parent(result.betk_nodist[21:23, 31:33]) -map(x -> Base.summarysize(x) / 1e6, workspace) -map(propertynames(workspace.grid)) do n - n => Base.summarysize(getproperty(workspace.grid, n)) / 1e6 + # TODO the floating point differnce is more + # significant here, 1e-3 is as gooda as it can get + @test isapprox(result.betk_exp50[21:23, 31:33], [ + 980.5828087688377 1307.981162399926 1602.8445739784497 + 826.0710054834001 1883.0940077789735 1935.4450344630702 + 676.9212075214159 2228.2700913772774 2884.0409495023364], atol=1e-3) + + @test result.betk_one[g.id_to_grid_coordinate_list] ≈ + result.betq[g.id_to_grid_coordinate_list] + # ebetk_one is wrong here + @test result.ebetk_one ≈ result.ebetq + end + + @testset "connected_habitat" begin + @test result.ch_nodist isa Raster{Float64} + @test size(result.ch_nodist) == size(g.source_qualities) + # TODO we need some real tests here + end end -using BenchmarkTools -@time result = ConScape.solve(problem, workspace); -@btime result = ConScape.solve(problem, workspace); -# workspace_copy = deepcopy(workspace) -# workspace.expected_costs -# workspace_copy.expected_costs -# map(workspace, workspace_copy) do x, y -# if x isa Union{Tuple,NamedTuple} -# all(map(==, x, y)) -# else -# x == y -# end -# end -using BenchmarkTools -@time workspace = init(problem, rast); -@btime ConScape.solve(problem, workspace); -@profview_allocs workspace = init(problem, rast) -@profview_allocs ConScape.solve(problem, workspace) sample_rate=1.0 -#@profview_allocs ConScape.solve(problem, workspace) -@test result isa RasterStack -@test size(result) == size(rast) -@test keys(result) == expected_layers - -plot(result) -sum(skipmissing(rebuild(result.func_exp; missingval=NaN))) -Base.summarysize(workspace) / 1e6 - -400 * 400 * 21 * 21 / 1e6 * sizeof(Float64) * 8 -@profview ConScape.init(problem, rast) -@profview ConScape.solve(problem, rast; workspace) -ConScape.solve(problem, rast) - -# Threaded solve problem -vector_problem = ConScape.Problem(; - graph_measures, connectivity_measure, - solver = ConScape.VectorSolver(; threaded=true), -) -@time workspace = init(vector_problem, rast); -@time vector_result = ConScape.solve(vector_problem, workspace); -@btime vector_result = ConScape.solve(vector_problem, workspace); -@test vector_result isa RasterStack -@test size(vector_result) == size(rast) -@test keys(vector_result) == expected_layers -@test all(vector_result.func_exp .=== result.func_exp) -Plots.plot(vector_result) - -Base.summarysize(workspace) / 1e6 -sum(skipmissing(rebuild(vector_result.func_exp; missingval=NaN))) -@profview workspace = init(vector_problem, rast); -@profview ConScape.solve(vector_problem, workspace) -map(w -> Base.summarysize(w) / 10^6, workspace) -map(w -> Base.summarysize(w) / 10^6, workspace.A_init) - -# Problem with custom solver -linearsolve_problem = ConScape.Problem(; - graph_measures, connectivity_measure, - solver = ConScape.LinearSolver(MKLPardisoIterate(; nprocs=20)), - # solver = ConScape.LinearSolver(KrylovJL_GMRES(precs = (A, p) -> (Diagonal(A), I))), -) -Base.summarysize(workspace) / 1e6 -@time ls_result = ConScape.solve(linearsolve_problem, rast) -@test ls_result isa RasterStack -@test size(ls_result) == size(rast) -@test keys(ls_result) == expected_layers - -@profview ConScape.init(linearsolve_problem, rast) -@profview ConScape.solve(linearsolve_problem, rast) +graph_measures = (; + ch=ConScape.ConnectedHabitat(), + betq=ConScape.BetweennessQweighted(), + betk=ConScape.BetweennessKweighted(), + # # TODO sens=ConScape.Sensitivity(), + # crit=ConScape.Criticality(), # very very slow, each target makes a new grid +) +distance_transformation = x -> exp(-x / 5) +distance_transformation(10) +connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) +expected_layers = (:ch_nodist, :ch_one, :betq, :betk_nodist, :betk_one) + +solver = ConScape.MatrixSolver() +problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) +windowed_problem = ConScape.WindowedProblem(problem; + source_radius=20, target_radius=10, threaded=false +) +# ConScape.allocations(windowed_problem, rast) / 1e6 +@time ConScape.solve(windowed_problem, rast, verbose=true) # WindowedProblem returns a RasterStack windowed_problem = ConScape.WindowedProblem(problem; - radius=40, overlap=10, threaded=true + buffer=10, centersize=5, threaded=false, ) -windowed_result = ConScape.solve(windowed_problem, rast, verbose=true) +windowed_result = ConScape.solve(windowed_problem, rast; + # test_windows=true, + verbose=false, + # mosaic_return=false +) +plot(rast) plot(windowed_result) - -using GLMakie -Rasters.rplot(windowed_result) +plot(windowed_result[7]) @test windowed_result isa RasterStack @test size(windowed_result) == size(rast) @test keys(windowed_result) == expected_layers +plot(windowed_result) + +@test collect(ConScape._get_window_ranges(windowed_problem, rast)) == [ + (1:20, 1:20) (1:20, 17:36) (1:20, 33:52) (1:20, 49:59) + (17:36, 1:20) (17:36, 17:36) (17:36, 33:52) (17:36, 49:59) + (33:44, 1:20) (33:44, 17:36) (33:44, 33:52) (33:44, 49:59) +] +test_results = ConScape.solve(windowed_problem, rast; verbose=true, test_windows=true) +inner_targets = copy(rast.target_qualities) +# Edge targets are lost with windowing +inner_targets[1:2, :] .= NaN +inner_targets[:, 1:2] .= NaN +inner_targets[end-1:end, :] .= NaN +inner_targets[:, end-1:end] .= NaN +@test all(inner_targets .=== test_results.target_qualities) + +plot(test_results.target_qualities) +plot(rast.target_qualities) -window_tiles = ConScape.solve(windowed_problem, rast; test_windows=true, verbose=true) -plot(window_tiles) -Rasters.rplot(window_tiles) +windowed_problem_t1 = ConScape.WindowedProblem(problem; + source_radius=10, target_radius=1, threaded=true +) +windowed_problem_t2 = ConScape.WindowedProblem(problem; + source_radius=10, target_radius=2, threaded=true +) +windowed_problem_t4 = ConScape.WindowedProblem(problem; + source_radius=10, target_radius=4, threaded=true +) +windowed_problem_t6 = ConScape.WindowedProblem(problem; + source_radius=10, target_radius=6, threaded=true +) +length(ConScape._get_window_ranges(windowed_problem_t1, rast)) +length(ConScape._get_window_ranges(windowed_problem_t2, rast)) +length(ConScape._get_window_ranges(windowed_problem_t4, rast)) +length(ConScape._get_window_ranges(windowed_problem_t6, rast)) +using BenchmarkTools +@btime ConScape.solve(windowed_problem_t1, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t2, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t4, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t6, rast, verbose=false); +@profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 +@profview +res = ConScape.solve(windowed_problem_t1, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t2, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t4, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t6, rast, verbose=false) +res = ConScape.solve(windowed_problem_t4, rast, verbose=false) -# StoredProblem writes files to disk and mosaics to RasterStack +# BatchProblem writes files to disk and mosaics to RasterStack -stored_problem = ConScape.StoredProblem(problem; - path=tempname(), radius=40, overlap=10, threaded=true +stored_problem = ConScape.BatchProblem(problem; + path=tempname(), source_radius=20, target_radius=10, threaded=true ) ConScape.solve(stored_problem, rast; verbose=true) stored_result = mosaic(stored_problem; to=rast) @test stored_result isa RasterStack -@test size(stored_result) == size(rast) +@test size(stored_result) == reverse(size(rast)) # keys are sorted now from file-name order @test keys(stored_result) == Tuple(sort(collect(expected_layers))) # Check the answer matches the WindowedProblem -@test all(stored_result.func_exp .=== windowed_result.func_exp) - -plot(stored_result) -Rasters.rplot(stored_result.func_exp .- result.func_exp) -sum(skipmissing(windowed_result.func_exp)) -sum(skipmissing(stored_result.func_exp)) -sum(skipmissing(rebuild(result.func_exp; missingval=NaN))) +# Note: its been permuted back by GDAL +@test all(permutedims(stored_result.ch_nodist) .=== windowed_result.ch_nodist) -# StoredProblem can be run as batch jobs for clusters +# BatchProblem can be run as batch jobs for clusters # We just need a new path to make sure the result is from a new run -stored_problem2 = ConScape.StoredProblem(problem; - path=tempname(), radius=40, overlap=10, threaded=true +stored_problem2 = ConScape.BatchProblem(problem; + path=tempname(), radius=20, overlap=10, threaded=true ) njobs = ConScape.count_batches(stored_problem2, rast) -@test jobs isa Vector{Int} +@test njobs == 4 for job in 1:njobs ConScape.solve(stored_problem2, rast, job) end batch_result = mosaic(stored_problem2; to=rast) # Check the answer matches the non-batched run -@test all(batch_result.func_exp .=== stored_result.func_exp) +@test all(batch_result.ch_nodist .=== stored_result.ch_nodist) @test keys(batch_result) == Tuple(sort(collect(expected_layers))) -# StoredProblem can be nested with WindowedProblem +plot(batch_result.ch_nodist) +plot(stored_result.ch_nodist) + +# BatchProblem can be nested with WindowedProblem small_windowed_problem = ConScape.WindowedProblem(problem; radius=25, overlap=10, ) -nested_problem = ConScape.StoredProblem(small_windowed_problem; - path=tempname(), radius=40, overlap=10, threaded=false +nested_problem = ConScape.BatchProblem(small_windowed_problem; + path=tempname() * ".nc", radius=40, overlap=10, threaded=false ) ConScape.solve(nested_problem, rast) nested_result = mosaic(nested_problem; to=rast) diff --git a/test/runtests.jl b/test/runtests.jl index d8f839c..7a26086 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,7 @@ using Rasters, ArchGDAL, Plots datadir = joinpath(dirname(pathof(ConScape)), "..", "data") _tempdir = mkdir(tempname()) -@testset "sno_2000 Rasters" begin +#@testset "sno_2000 Rasters" begin landscape = "sno_2000" θ = 0.1 @@ -67,7 +67,7 @@ _tempdir = mkdir(tempname()) matrix_type=mt) isa ConScape.SparseMatrixCSC end - @testset "Test betweenness" begin + # @testset "Test betweenness" begin @testset "q-weighted" begin bet = ConScape.betweenness_qweighted(grsp) @test bet isa Raster @@ -77,7 +77,7 @@ _tempdir = mkdir(tempname()) 4641.815380725279 3365.3296878569213 477.1085971945757], atol=1e-3) end - @testset "k-weighted" begin + # @testset "k-weighted" begin bet = ConScape.betweenness_kweighted(grsp, diagvalue=1.) @test bet isa Raster @test isapprox(bet[21:23, 31:33], [ From 4186033122ecb6b924d9a555ca87ecd4794538e3 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 28 Jan 2025 19:56:42 +0100 Subject: [PATCH 11/51] window testing --- src/allocations.jl | 41 ++++++- src/problem.jl | 48 +------- src/solvers.jl | 31 +++-- src/tiles.jl | 286 +++++++++++++++++++++++++-------------------- test/problem.jl | 254 ++++++++++++++++++++++------------------ 5 files changed, 360 insertions(+), 300 deletions(-) diff --git a/src/allocations.jl b/src/allocations.jl index 46ba0db..1a2b259 100644 --- a/src/allocations.jl +++ b/src/allocations.jl @@ -1,5 +1,23 @@ -function allocations(p::Problem, rast::Raster; kw...) - allocations(p, Grid(rast; kw...)) + +""" + allocations(p::AbstractProblem, size::Tuple{Int,Int}) + allocations(p::AbstractProblem, rast::RasterStack) + +Calculate allocations in Bytes required to run the problem. +The maximum dense target size will be used, so that `size` +is symmetrical. You can pass e.g. `(1000, 200)`. where you +know the size of the largest sparse matrix generated from `rast`. + +`allocations` will likely underestimate as Julia may need to allocate +for compilatation and other things outside of our control. + +A warning will be thrown for problem components whos allocations +are not well known. +""" +function allocations end + +function allocations(p::Problem, rast::AbstractRasterStack; kw...) + allocations(p, Grid(rast); kw...) end function allocations(p::Problem, grid::Grid; kw...) sze = size(grid) @@ -12,8 +30,25 @@ function allocations(p::Problem, grid::Grid; kw...) return_size = sum(map(gm -> sizeofreturn(gm, sze), gms)) total = sparse_size + dense_size + init_size + return_size + grid_size - (; total, sparse_size, dense_size, init_size, return_size, grid_size) + # (; total, sparse_size, dense_size, init_size, return_size, grid_size) + return total end +function allocations(p::AbstractWindowedProblem, rast::AbstractRasterStack; + nthreads=Threads.nthreads(), kw... +) + # largest_first = sort!(collect(zip(vec(problem_sizes), vec(range_tuples))); rev=true) + range_tuples = vec(_window_ranges(p, rast)) + # Use the allocations for the largest windows + allocs = map(range_tuples) do rs + return allocations(p.problem, rast[rs...]; nthreads, kw...) + end + if p.threaded + sum(sort(allocs)[1:min(end, nthreads)]) + else + maximum(allocs; init=0) + end +end + # This is approximate. # Size of the solver initialisation / factorization diff --git a/src/problem.jl b/src/problem.jl index 139e371..04bb1f0 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -13,13 +13,6 @@ connectivity_function(p::AbstractProblem) = connectivity_function(connectivity_measure(p)) solver(p::AbstractProblem) = solver(p.problem) -""" - solve(problem, grid::Union{Grid,GridRSP}) - -Solve problem `o` for a grid. -""" -function solve end - """ assess(p::AbstractProblem, g) @@ -29,27 +22,6 @@ and time reequiremtents on a cluster """ function assess end -""" - allocations(p::AbstractProblem, size::Tuple{Int,Int}) - allocations(p::AbstractProblem, rast::RasterStack) - -Calculate allocations in Bytes required to run the problem. -The maximum dense target size will be used, so that `size` -is symmetrical. You can pass e.g. `(1000, 200)`. where you -know the size of the largest sparse matrix generated from `rast`. - -`allocations` will likely underestimate as Julia may need to allocate -for compilatation and other things outside of our control. - -A warning will be thrown for problem components whos allocations -are not well known. -""" -function allocations end -function allocations(p::AbstractProblem, rast::RasterStack) - s = prod(size(rast)) - allocations(p, (s, s)) -end - """ Problem(graph_measures...; solver, θ) @@ -75,20 +47,12 @@ graph_measures(p::Problem) = p.graph_measures connectivity_measure(p::Problem) = p.connectivity_measure solver(p::Problem) = p.solver -solve(p::Problem, g::Grid; workspace=nothing) = - solve(p.solver, connectivity_measure(p), p, g; workspace) -function solve(p::Problem, rast::RasterStack; workspace=nothing) - grid = isnothing(workspace) ? Grid(p, rast) : workspace.grid - return solve(p, grid; workspace) -end -solve(p::Problem, workspace::NamedTuple) = solve(p, workspace.grid; workspace) +solve(p::Problem, rast::RasterStack) = solve!(init(p, rast), p) +solve!(workspace::NamedTuple, p::Problem) = + solve!(workspace, solver(p), connectivity_measure(p), p) -init(args...; kw...) = init!((;), args...; kw...) +init(p::AbstractProblem, args...) = init!((;), p, args...) -function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; kw...) - grid = Grid(p, rast; kw...) # TODO reuse the grid - return (; grid, init!(workspace, p, grid)...) -end # Init is conditional on solver and connectivity measure -init!(workspace::NamedTuple, p::AbstractProblem, g::Grid) = - init!(workspace, solver(p), connectivity_measure(p), p, g) \ No newline at end of file +init!(workspace::NamedTuple, p::AbstractProblem, rast::RasterStack) = + init!(workspace, solver(p), connectivity_measure(p), p, rast) \ No newline at end of file diff --git a/src/solvers.jl b/src/solvers.jl index bc6f2c3..e96daaf 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -5,8 +5,9 @@ function init!( s::Solver, cm::FundamentalMeasure, p::AbstractProblem, - g::Grid + rast::RasterStack, ) + grid = g = Grid(p, rast) gms = graph_measures(p) cf = connectivity_function(p) Pref = _Pref(g.affinities) @@ -35,7 +36,7 @@ function init!( Z = ldiv!(s, A_init, B_dense; B_copy=copyto!(workspaces[1], B_dense)) # Check that values in Z are not too small: _check_z(s, Z, W, g) - grsp = GridRSP(g, cm.θ, Pref, W, Z) + grsp = GridRSP(grid, cm.θ, Pref, W, Z) Zⁱ = if hastrait(needs_inv, gms) haskey(ws, :Zⁱ) ? _inv!(_reshape(ws.Zⁱ, size(Z)), Z) : _inv(Z) @@ -62,7 +63,7 @@ function init!( nothing, nothing end # Create an intermediate workspace to use in computations - workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init) + workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init) expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p)) else @@ -82,20 +83,28 @@ function init!( # TODO make a trait CW = grsp.g.costmatrix .* grsp.W - return (; grsp, workspace_kw..., CW, free_energy_distances, expected_costs, proximities) + return (; grid, grsp, workspace_kw..., CW, free_energy_distances, expected_costs, proximities) end # RSP is not used for ConnectivityMeasure, so the solver isn't used -function solve(s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid; - workspace=init(s, cm, p, g), +function solve!( + workspace::NamedTuple, + s::Solver, + cm::ConnectivityMeasure, + p::AbstractProblem, ) + g = workspace.grid return map(p.graph_measures) do gm - compute(gm, p, g; solver=s, workspace...) + compute(gm, p, ; workspace...) end end -function solve(s::Solver, cm::FundamentalMeasure, p::Problem, g::Grid; - workspace=init(s, cm, p, g) +function solve!( + workspace::NamedTuple, + s::Solver, + cm::FundamentalMeasure, + p::Problem, ) + g = workspace.grid gms = graph_measures(p) distance_transformation = cm.distance_transformation results = if distance_transformation isa NamedTuple @@ -143,9 +152,9 @@ function solve(s::Solver, cm::FundamentalMeasure, p::Problem, g::Grid; return _merge_to_stack(results) end -function init!(workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, g::Grid) +function init!(workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack) # TODO what is needed here? - return (;) + return (; grid=Grid(p, rast)) end LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = diff --git a/src/tiles.jl b/src/tiles.jl index da9806b..3052838 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -1,6 +1,11 @@ # This file is a work in progress... -abstract type AbstractWindowedProblem end +abstract type AbstractWindowedProblem <: AbstractProblem end + +buffer(p::AbstractWindowedProblem) = p.buffer +buffer(p::AbstractProblem) = 0 + +grain(::AbstractProblem) = nothing """ WindowedProblem(problem::AbstractProblem; size, centers, θ) @@ -30,15 +35,16 @@ function solve(p::WindowedProblem, rast::RasterStack; verbose=false, mosaic_return=true ) - window_ranges = collect(_window_ranges(p, rast)) + window_ranges = _window_ranges(p, rast) + window_indices = _window_indices(p, rast; window_ranges) # Test outputs just return the inputs after window masking if test_windows - output_stacks = map(eachindex(window_ranges)) do i - _mask_target_qualities_overlap!(rast, window_ranges[i], p) + output_stacks = map(window_indices) do i + _get_window_with_zeroed_buffer(rast, window_ranges[i], p) end return if mosaic_return Rasters.mosaic(sum, collect(skipmissing(output_stacks)); - to=rast, missingval=NaN + to=rast, missingval=0.0, verbose ) else output_stacks @@ -52,87 +58,61 @@ function solve(p::WindowedProblem, rast::RasterStack; put!(ch, (;)) end # Define empty outputs - output_stacks = Vector{RasterStack}(undef, length(window_ranges)) + output_stacks = Vector{RasterStack}(undef, length(window_indices)) # Define a runner for threaded/non-threaded operation - function run(i) + function run(i, ir) # Get a window range - rs = window_ranges[i] + rs = window_ranges[ir] # verbose && println("Solving window $i $rs ") - rast_window = _mask_target_qualities_overlap!(rast, rs, p) + rast_window = _get_window_with_zeroed_buffer(rast, rs, p) # Initialise the window using stored memory workspace = init!(take!(ch), p.problem, rast_window) # Solve for the window - output_stacks[i] = solve(p.problem, workspace) + output_stacks[i] = solve!(workspace, p.problem) # Return the workspace to the channel put!(ch, workspace) end # Run the window problems if p.threaded - Threads.@threads :greedy for i in eachindex(window_ranges) - run(i) + Threads.@threads :greedy for (i, ir) in enumerate(window_indices) + run(i, ir) end else - for i in eachindex(window_ranges) - run(i) + for (i, ir) in enumerate(window_indices) + run(i, ir) end end # Maybe mosaic the output return if mosaic_return - Rasters.mosaic(sum, output_stacks; to=rast, missingval=NaN) + Rasters.mosaic(sum, output_stacks; to=rast, missingval=0.0, verbose) else output_stacks end end -function allocations(p::WindowedProblem, rast::AbstractRasterStack; - nthreads=Threads.nthreads(), kw... -) - range_tuples = _window_ranges(p, rast) - if p.threaded - return sum(range_tuples[1:min(end, nthreads)]) do rs - allocations(p.problem, rast[rs...]; nthreads, kw...) - end - else - return allocations(p.problem, rast[first(range_tuples)...]; nthreads, kw...) - end -end +# sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) -function _window_ranges(p, rast) - window_ranges = collect(_window_ranges(p, rast)) - # We need at least one window - length(window_ranges) > 0 || throw(ArgumentError("No tiles selected, use a smaller overlap or larger radius")) - # Get a bitmask of valid windows (not all zeros or NaNs) - valid_window_mask = _valid_window_mask(p, rast, window_ranges) - # We only use valid windows - used_ranges = ranges[valid_window_mask] - # Sort by size so we can the largest windows first. - # This should make threading slightly more efficient as the last tasks - # (when some threads are idle) will be the fastest ones. - # It also allocates the largest arrays first so we can reuse them for the - # smaller ones without moving the memory - sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) - - return sorted_ranges +function _max_window_problem_size(p::AbstractWindowedProblem, rast) + sizes = _window_problem_sizes(p::AbstractWindowedProblem, rast) + _, i = findmax(sizes) + return sizes[i] end - -function _max_window_size(p::WindowedProblem, rast) - # TODO make this work nested +# Calculate the maximum number of source and target values in any window +function _window_problem_sizes(p::AbstractWindowedProblem, rast) rs = _window_ranges(p, rast) # Calculate the maximum number of source and target values in any window - sizes = map(x -> _size(p, rast, x), rs) - _, i = findmax(prod, sizes) - return sizes[i] + return map(r -> _problem_size(p, rast, r), rs) end -function _size(p::WindowedProblem, rast, ranges::Tuple) +function _problem_size(p::AbstractProblem, rast, ranges::Tuple) source_count = _valid_sources(count, p, rast, ranges) target_count = _valid_targets(count, p, rast, ranges) - return (source_count, target_count) + return source_count * target_count end """ - BatchProblem(problem::AbstractProblem; radius, overlap, path, ext) + BatchProblem(problem::AbstractProblem; buffer, centersize, path, ext) Combine multiple compute operations into a single object, when compute times are long and intermediate storage is needed. @@ -142,102 +122,133 @@ for nested operations. # Keywords -- `radius`: The radius of the window - 2radius + 1 is the diameter. -- `overlap`: The overlap between adjacent windows. -- `path`: The path to store the output rasters. +- `nwindows`: When `problem` is a `WindowedProblem`, the number of windows to use. + When used, `centersize` and `buffer` are not needed. +- `centersize`: The size of the target square +- `buffer`: The area outside taret square +- `datapath`: The path to store the output rasters. +- `joblistpath`: The path to find the job list. +- `grain`: amount of thinning to apply to the target qualities. `nothing` by default. + if `2 is used`, the target qualities will be sampled every 2x2 pixels, and should run 4x faster. - `ext`: The file extension for Rasters.jl to write to. Defaults to `.tif`, - But can be `.nc` for NetCDF or most other common extensions. -- `threaded`: Whether to run in parallel. `false` by default + But can be `.nc` for NetCDF, or most other common extensions. +- `threaded`: Whether to run in parallel. `false` by default. If the problem + is also threaded at some level it may be faster to set this to `false`. """ @kwdef struct BatchProblem <: AbstractWindowedProblem problem::AbstractProblem - centersize::Int buffer::Int + centersize::Tuple{Int,Int} datapath::String - joblistpath::String + joblistpath::Union{String,Nothing}=nothing grain::Union{Nothing,Int} = nothing ext::String = ".tif" threaded::Bool = false end -BatchProblem(problem; kw...) = BatchProblem(; problem, kw...) - -function solve(p::BatchProblem, rast::RasterStack; - verbose=false, +function BatchProblem(problem::Problem; + centersize::Union{Int,Tuple{Int,Int}}, kw... +) + centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) + BatchProblem(; problem, centersize, kw...) +end +function BatchProblem(problem::WindowedProblem; + nwindows=nothing, + centersize::Union{Nothing,Int,Tuple{Int,Int}}=nothing, + buffer::Union{Nothing,Int}=nothing, + kw... ) - ch = Channel{NamedTuple}() - for _ in 1:Threads.nthreads() - put!(ch, (;)) + buffer = if isnothing(buffer) + problem.buffer + else + buffer == problem.buffer || + throw(ArgumentError("BatchProblem buffer must match WindowedProblem buffer. Got $buffer and $(problem.buffer)")) + buffer end - ranges = collect(_window_ranges(p, rast)) - mask = _valid_window_mask(p, rast, ranges) - used_ranges = ranges[mask] + centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) + map(centersize, ConScape.centersize(problem)) do bcs, wcs + rem(bcs, wcs) == 0 || + throw(ArgumentError("BatchProblem centersize must be a multiple of WindowedProblem centersize. Got $centersize and $(problem.centersize)")) + end + isnothing(nwindows) || throw(ArgumentError("Cannot specify both centersize and nwindows")) + BatchProblem(; problem, buffer, centersize, kw...) +end + +function solve(p::BatchProblem, rast::RasterStack; kw...) + window_indices = _window_indices(p, rast) function run(i) - rs = used_ranges[i] - verbose && println("Solving window $i $rs ") - rast_window = _mask_target_qualities_overlap!(rast, rs, p) - storage = take!(ch) - workspace = if isnothing(storage) - init!(storage, p, rast_window) - end - output = solve(p.problem, workspace) - put!(ch, workspace) - _store(p, output, rs) + solve(p, rast, i; window_indices, kw...) end if p.threaded - Threads.@threads for i in eachindex(used_ranges) - run(i) + Threads.@threads :greedy for i in eachindex(window_indices) + run(i) end else - for i in eachindex(used_ranges) - run(i) + for i in eachindex(window_indices) + run(i) end end end # Single batch job for running on clusters function solve(p::BatchProblem, rast::RasterStack, i::Int; - verbose=false, + window_indices=nothing, verbose=false, kw... ) - # Indices i are contiguous so we need to spread them - # accross the actual tiles that need to be done + # Indices i are contiguous so we need to spread them accross the actual tiles + # that need to be done by first calculating or retrieving `window_indices`. + # Manual calculateion is best avoided when it means reading 10gb over a network. + window_ranges = _window_ranges(p, rast) + if isnothing(window_indices) + window_indices = if isnothing(p.joblistpath) + _window_indices(p, rast) + else + _read_joblist(p) + end + end - # Get all the tile ranges - ranges = collect(_window_ranges(p, rast)) - # Get the Bool mask of needed windows - mask = _valid_window_mask(p, rast, ranges) - # Get the Int indices of the needed windows - tile_inds = eachindex(mask)[vec(mask)] + # Job i + rs = window_ranges[window_indices[i]] # Get the current window for this job - rs = ranges[tile_inds[i]] - # Get the ranges of the window for this job - rast_window = _mask_target_qualities_overlap!(rast, rs, p) - # Maybe thin the target qualities - if !isnothing(p.grain) - rast_window = ConScape.coarse_graining(rast_window, p.grain) - end - output = solve(p.problem, rast[rs...]) + rast_window = _get_window_with_zeroed_buffer(rast, rs, p) + output = solve(p.problem, rast_window; kw...) # Store the output rasters for this job to disk - filename = _store(p, output, rs) - return filename + if !ismissing(output) + _store(p, output, rs; verbose) + end + return nothing end -""" - count_batches(p::BatchProblem, rast::RasterStack) +function assess(p::BatchProblem, rast::RasterStack) + window_indices = _window_indices(p, rast) + _write_joblist(p; window_indices) + a = allocations(p, rast) + return (; max_allocations=a, njobs=length(window_indices)) +end -Count the number of batch jobs that would need to be run. +grain(p::BatchProblem) = p.grain -A Slurm array job would then be specified "0-(N-1)" +centersize(p::WindowedProblem) = p.centersize, p.centersize +centersize(p::BatchProblem) = p.centersize -Returns an `Int`. -""" -function count_batches(p::BatchProblem, rast::RasterStack) - ranges = _window_ranges(p, rast) - mask = _valid_window_mask(p, rast, ranges) - return count(mask) +### Batch utilities + +function _read_joblist(p::BatchProblem) + # Read indices from the joblist file. This is generated in `assess` + isfile(p.joblistpath) || throw(ArgumentError("joblistpath $(p.joblistpath) does not exist")) + return parse.(Int, readlines(p.joblistpath)) +end + +function _write_joblist(p::BatchProblem; window_indices) + if !isnothing(p.joblistpath) + open(p.joblistpath, "w") do io + for i in window_indices + println(io, i) + end + end + end end # Mosaic the stored files to a RasterStack function Rasters.mosaic(p::BatchProblem; - to, lazy=false, filename=nothing, missingval=NaN, kw... + to, lazy=false, filename=nothing, missingval=0.0, kw... ) ranges = _window_ranges(p, to) mask = _valid_window_mask(p, to, ranges) @@ -247,30 +258,41 @@ function Rasters.mosaic(p::BatchProblem; return Rasters.mosaic(sum, stacks; to, filename, missingval, kw...) end -function _store(p::BatchProblem, output::RasterStack{K}, ranges) where K +function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where K path = mkpath(_window_path(p, ranges)) return Rasters.write(joinpath(path, ""), output; - ext=p.ext, verbose=false, force=true + ext=p.ext, force=true, kw... ) end -function _window_path(p, ranges) +function _window_path(p, ranges::Tuple) corners = map(first, ranges) window_dirname = "window_" * join(corners, '_') - return joinpath(p.path, window_dirname) + return joinpath(p.datapath, window_dirname) end ### Shared utilities -_window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) = - _window_ranges(size(rast), p.centersize, p.buffer) -function _window_ranges(size::Tuple{Int,Int}, centersize::Int, buffer::Int) - windowsize = 2buffer + centersize +function _window_indices(p, rast; + window_ranges=_window_ranges(p, rast) +) + # Get the Bool mask of needed windows + mask = _valid_window_mask(p, rast, window_ranges) + # Get the Int indices of the needed windows + return eachindex(mask)[vec(mask)] +end + +function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) + size = Base.size(rast) + centersize = ConScape.centersize(p) + buffer = ConScape.buffer(p) + ws1, ws2 = windowsize = 2buffer .+ centersize + cs1, cs2 = centersize # Define the corners of each window - corners = CartesianIndices(size)[begin:centersize:end-windowsize, begin:centersize:end-windowsize] + corners = CartesianIndices(size)[begin:cs1:end, begin:cs2:end] # Create an iterator of ranges for retreiving each window - return (map((i, sz) -> i:min(sz, i + windowsize-1), Tuple(c), size) for c in corners) + return [map((i, s, ws) -> i:min(s, i + ws-1), Tuple(c), size, windowsize) for c in corners] end # Create a mask to skip tiles that have no target cells @@ -278,10 +300,14 @@ _valid_window_mask(p, ::Nothing, ranges) = nothing _valid_window_mask(p, rast::AbstractRasterStack, ranges) = map(r -> _valid_targets(any, p, rast, r), ranges) -function _mask_target_qualities_overlap!(rast, rs, p) - b = p.buffer +function _get_window_with_zeroed_buffer(rast, rs, p::AbstractWindowedProblem) + b = buffer(p) fill = zero(eltype(rast.target_qualities)) - dest = rast[rs...] + dest = if isnothing(grain(p)) + rast[rs...] + else + coarse_graining(view(rast, rs), grain(p)) + end dest.target_qualities[begin:min(begin+b-1, end), :] .= fill dest.target_qualities[:, begin:min(begin+b-1, end)] .= fill dest.target_qualities[max(end-b+1, begin):end, :] .= fill @@ -300,11 +326,13 @@ function _valid_sources(f, p, rast::AbstractRasterStack, source_ranges::Tuple) # TODO allow users to change this condition? f(x -> !isnan(x) && x > zero(x), window) end -function _valid_targets(f, p, rast::AbstractRasterStack, source_ranges::Tuple) +function _valid_targets( + f, p, rast::AbstractRasterStack, source_ranges::Tuple +) # Get the range of the target vaues - o = overlap(p) + b = buffer(p) target_ranges = map(source_ranges) do r - r[o+1:end-o] + r[b+1:end-b] end # Get a window view window = view(rast.target_qualities, target_ranges...) diff --git a/test/problem.jl b/test/problem.jl index 67f8e00..8e7dec5 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -144,133 +144,157 @@ end graph_measures = (; - ch=ConScape.ConnectedHabitat(), - betq=ConScape.BetweennessQweighted(), + # betq=ConScape.BetweennessQweighted(), betk=ConScape.BetweennessKweighted(), + ch=ConScape.ConnectedHabitat(), # # TODO sens=ConScape.Sensitivity(), # crit=ConScape.Criticality(), # very very slow, each target makes a new grid ) -distance_transformation = x -> exp(-x / 5) -distance_transformation(10) +# Set low alpha here so the decay is steep for testing +distance_transformation = x -> exp(-x / 2) connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) -expected_layers = (:ch_nodist, :ch_one, :betq, :betk_nodist, :betk_one) +expected_layers = (:betk, :ch) solver = ConScape.MatrixSolver() problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) -windowed_problem = ConScape.WindowedProblem(problem; - source_radius=20, target_radius=10, threaded=false -) -# ConScape.allocations(windowed_problem, rast) / 1e6 -@time ConScape.solve(windowed_problem, rast, verbose=true) -# WindowedProblem returns a RasterStack -windowed_problem = ConScape.WindowedProblem(problem; - buffer=10, centersize=5, threaded=false, -) -windowed_result = ConScape.solve(windowed_problem, rast; - # test_windows=true, - verbose=false, - # mosaic_return=false -) -plot(rast) -plot(windowed_result) -plot(windowed_result[7]) -@test windowed_result isa RasterStack -@test size(windowed_result) == size(rast) -@test keys(windowed_result) == expected_layers -plot(windowed_result) - -@test collect(ConScape._get_window_ranges(windowed_problem, rast)) == [ - (1:20, 1:20) (1:20, 17:36) (1:20, 33:52) (1:20, 49:59) - (17:36, 1:20) (17:36, 17:36) (17:36, 33:52) (17:36, 49:59) - (33:44, 1:20) (33:44, 17:36) (33:44, 33:52) (33:44, 49:59) -] -test_results = ConScape.solve(windowed_problem, rast; verbose=true, test_windows=true) -inner_targets = copy(rast.target_qualities) -# Edge targets are lost with windowing -inner_targets[1:2, :] .= NaN -inner_targets[:, 1:2] .= NaN -inner_targets[end-1:end, :] .= NaN -inner_targets[:, end-1:end] .= NaN -@test all(inner_targets .=== test_results.target_qualities) - -plot(test_results.target_qualities) -plot(rast.target_qualities) - -windowed_problem_t1 = ConScape.WindowedProblem(problem; - source_radius=10, target_radius=1, threaded=true -) -windowed_problem_t2 = ConScape.WindowedProblem(problem; - source_radius=10, target_radius=2, threaded=true -) -windowed_problem_t4 = ConScape.WindowedProblem(problem; - source_radius=10, target_radius=4, threaded=true -) -windowed_problem_t6 = ConScape.WindowedProblem(problem; - source_radius=10, target_radius=6, threaded=true -) -length(ConScape._get_window_ranges(windowed_problem_t1, rast)) -length(ConScape._get_window_ranges(windowed_problem_t2, rast)) -length(ConScape._get_window_ranges(windowed_problem_t4, rast)) -length(ConScape._get_window_ranges(windowed_problem_t6, rast)) -using BenchmarkTools -@btime ConScape.solve(windowed_problem_t1, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t2, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t4, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t6, rast, verbose=false); -@profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 -@profview -res = ConScape.solve(windowed_problem_t1, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t2, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t4, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t6, rast, verbose=false) -res = ConScape.solve(windowed_problem_t4, rast, verbose=false) + +@testset "target mosaicing matches original" begin + # TODO note that this breaks if q weighting is included + windowed_problem = ConScape.WindowedProblem(problem; + buffer=10, centersize=5, threaded=false + ) + @test collect(ConScape._window_ranges(windowed_problem, rast)) == [ + (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) (1:25, 41:59) (1:25, 46:59) (1:25, 51:59) (1:25, 56:59) + (6:30, 1:25) (6:30, 6:30) (6:30, 11:35) (6:30, 16:40) (6:30, 21:45) (6:30, 26:50) (6:30, 31:55) (6:30, 36:59) (6:30, 41:59) (6:30, 46:59) (6:30, 51:59) (6:30, 56:59) + (11:35, 1:25) (11:35, 6:30) (11:35, 11:35) (11:35, 16:40) (11:35, 21:45) (11:35, 26:50) (11:35, 31:55) (11:35, 36:59) (11:35, 41:59) (11:35, 46:59) (11:35, 51:59) (11:35, 56:59) + (16:40, 1:25) (16:40, 6:30) (16:40, 11:35) (16:40, 16:40) (16:40, 21:45) (16:40, 26:50) (16:40, 31:55) (16:40, 36:59) (16:40, 41:59) (16:40, 46:59) (16:40, 51:59) (16:40, 56:59) + (21:44, 1:25) (21:44, 6:30) (21:44, 11:35) (21:44, 16:40) (21:44, 21:45) (21:44, 26:50) (21:44, 31:55) (21:44, 36:59) (21:44, 41:59) (21:44, 46:59) (21:44, 51:59) (21:44, 56:59) + (26:44, 1:25) (26:44, 6:30) (26:44, 11:35) (26:44, 16:40) (26:44, 21:45) (26:44, 26:50) (26:44, 31:55) (26:44, 36:59) (26:44, 41:59) (26:44, 46:59) (26:44, 51:59) (26:44, 56:59) + (31:44, 1:25) (31:44, 6:30) (31:44, 11:35) (31:44, 16:40) (31:44, 21:45) (31:44, 26:50) (31:44, 31:55) (31:44, 36:59) (31:44, 41:59) (31:44, 46:59) (31:44, 51:59) (31:44, 56:59) + (36:44, 1:25) (36:44, 6:30) (36:44, 11:35) (36:44, 16:40) (36:44, 21:45) (36:44, 26:50) (36:44, 31:55) (36:44, 36:59) (36:44, 41:59) (36:44, 46:59) (36:44, 51:59) (36:44, 56:59) + (41:44, 1:25) (41:44, 6:30) (41:44, 11:35) (41:44, 16:40) (41:44, 21:45) (41:44, 26:50) (41:44, 31:55) (41:44, 36:59) (41:44, 41:59) (41:44, 46:59) (41:44, 51:59) (41:44, 56:59) + ] + test_results = ConScape.solve(windowed_problem, rast; test_windows=true) + inner_targets = copy(rast.target_qualities) + replace!(inner_targets, NaN => 0.0) + # Edge targets are lost with windowing + inner_targets[1:10, :] .= 0 + inner_targets[:, 1:10] .= 0 + inner_targets[end-9:end, :] .= 0 + inner_targets[:, end-9:end] .= 0 + @test inner_targets == test_results.target_qualities +end + +@testset "windowed results approximate non-windowed" begin + buffer=15 + windowed_problem = ConScape.WindowedProblem(problem; + buffer, centersize=5, threaded=false + ) + mask!(rast; with=rast) + rast_inner = ConScape._get_window_with_zeroed_buffer(rast, axes(rast), windowed_problem) + @time wp_result = ConScape.solve(windowed_problem, rast) + @time p_result = ConScape.solve(problem, rast_inner) + # plot(p_result) + # plot(wp_result) + @test maplayers(p_result, wp_result) do P, WP + broadcast(P, WP) do p, wp + isnan(p) && isnan(wp) || isapprox(p, wp; atol=1e-4) + end |> all + end |> all +end + # BatchProblem writes files to disk and mosaics to RasterStack -stored_problem = ConScape.BatchProblem(problem; - path=tempname(), source_radius=20, target_radius=10, threaded=true -) -ConScape.solve(stored_problem, rast; verbose=true) -stored_result = mosaic(stored_problem; to=rast) -@test stored_result isa RasterStack -@test size(stored_result) == reverse(size(rast)) -# keys are sorted now from file-name order -@test keys(stored_result) == Tuple(sort(collect(expected_layers))) -# Check the answer matches the WindowedProblem -# Note: its been permuted back by GDAL -@test all(permutedims(stored_result.ch_nodist) .=== windowed_result.ch_nodist) - -# BatchProblem can be run as batch jobs for clusters -# We just need a new path to make sure the result is from a new run -stored_problem2 = ConScape.BatchProblem(problem; - path=tempname(), radius=20, overlap=10, threaded=true -) -njobs = ConScape.count_batches(stored_problem2, rast) -@test njobs == 4 +@testset "batch problem matches windowed problem" begin + # Use a higher alpha to catch differences + distance_transformation = x -> exp(-x / 50) + connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) + problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) + + kw = (; buffer=10, centersize=5, threaded=false) + windowed_problem = ConScape.WindowedProblem(problem; kw...) + windowed_result = ConScape.solve(windowed_problem, rast) + + batch_problem = ConScape.BatchProblem(problem; datapath=tempname(), kw...) + ConScape.solve(batch_problem, rast) + batch_result = mosaic(batch_problem; to=rast) + @test batch_result isa RasterStack + + # BatchProblem can be run as batch jobs for clusters + # We just need a new path to make sure the result is from a new run + batch_jobs_problem = ConScape.BatchProblem(problem; + datapath=tempname(), joblistpath=tempname(), kw... + ) + assessment = ConScape.assess(batch_jobs_problem, rast) + batch_jobs_problem.centersize + @test assessment.njobs == 39 + @test isfile(batch_jobs_problem.joblistpath) + ConScape._read_joblist(batch_jobs_problem) + + for job in 1:assessment.njobs + ConScape.solve(batch_jobs_problem, rast, job) + end + batch_jobs_result = mosaic(batch_jobs_problem; to=rast) -for job in 1:njobs - ConScape.solve(stored_problem2, rast, job) + nested_problem = ConScape.BatchProblem(windowed_problem; + datapath=tempname(), centersize=(10, 10), threaded=false + ) + ConScape.assess(nested_problem, rast) + ConScape.solve(nested_problem, rast) + nested_result = mosaic(nested_problem; to=rast) + @test nested_result isa RasterStack + + @test keys(windowed_result) == + keys(nested_result) == + keys(batch_result) == + keys(batch_jobs_result) == Tuple(sort(collect(expected_layers))) + + @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch) .=== windowed_result.ch) + @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk) .=== windowed_result.betk) + + # TODO: there are some tiny fp differences in the nested result + @test all(map(nested_result.ch, batch_result.ch) do n, b + isnan(n) && isnan(b) || isapprox(n, b) + end) + + # plot(windowed_result) + # plot(batch_result) + # plot(batch_jobs_result) + # plot(nested_result) end -batch_result = mosaic(stored_problem2; to=rast) -# Check the answer matches the non-batched run -@test all(batch_result.ch_nodist .=== stored_result.ch_nodist) -@test keys(batch_result) == Tuple(sort(collect(expected_layers))) -plot(batch_result.ch_nodist) -plot(stored_result.ch_nodist) -# BatchProblem can be nested with WindowedProblem -small_windowed_problem = ConScape.WindowedProblem(problem; - radius=25, overlap=10, -) -nested_problem = ConScape.BatchProblem(small_windowed_problem; - path=tempname() * ".nc", radius=40, overlap=10, threaded=false -) -ConScape.solve(nested_problem, rast) -nested_result = mosaic(nested_problem; to=rast) -@test nested_result isa RasterStack -@test size(nested_result) == size(rast) -@test keys(nested_result) == Tuple(sort(collect(expected_layers))) \ No newline at end of file +# Scale Benchmarking... + +# windowed_problem_t1 = ConScape.WindowedProblem(problem; +# source_radius=10, target_radius=1, threaded=true +# ) +# windowed_problem_t2 = ConScape.WindowedProblem(problem; +# source_radius=10, target_radius=2, threaded=true +# ) +# windowed_problem_t4 = ConScape.WindowedProblem(problem; +# source_radius=10, target_radius=4, threaded=true +# ) +# windowed_problem_t6 = ConScape.WindowedProblem(problem; +# source_radius=10, target_radius=6, threaded=true +# ) +# length(ConScape._get_window_ranges(windowed_problem_t1, rast)) +# length(ConScape._get_window_ranges(windowed_problem_t2, rast)) +# length(ConScape._get_window_ranges(windowed_problem_t4, rast)) +# length(ConScape._get_window_ranges(windowed_problem_t6, rast)) +# using BenchmarkTools +# @btime ConScape.solve(windowed_problem_t1, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t2, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t4, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t6, rast, verbose=false); +# @profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 +# @profview +# res = ConScape.solve(windowed_problem_t1, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t2, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t4, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t6, rast, verbose=false) +# res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file From ba234c34e977fc877129cd13b0a1affadd447f45 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 29 Jan 2025 13:30:07 +0100 Subject: [PATCH 12/51] tweaks --- src/allocations.jl | 22 ++++++++---------- src/grid.jl | 8 +++---- src/tiles.jl | 57 +++++++++++++++++++++++++--------------------- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/src/allocations.jl b/src/allocations.jl index 1a2b259..dcff652 100644 --- a/src/allocations.jl +++ b/src/allocations.jl @@ -16,31 +16,30 @@ are not well known. """ function allocations end -function allocations(p::Problem, rast::AbstractRasterStack; kw...) - allocations(p, Grid(rast); kw...) -end -function allocations(p::Problem, grid::Grid; kw...) - sze = size(grid) +allocations(p::Problem, rast::AbstractRasterStack; kw...) = + allocations(p, _problem_size(p, rast); kw...) +allocations(p::Problem, grid::Grid; kw...) = + allocations(p, size(grid); kw...) +function allocations(p::Problem, sze::Tuple{Int,Int}; kw...) gms = graph_measures(p) dense_size = sizeofdense(p, sze) sparse_size = sizeofsparse(p, sze) init_size = allocations(solver(p), sze; kw...) - grid_size = Base.summarysize(grid) return_size = sum(map(gm -> sizeofreturn(gm, sze), gms)) - total = sparse_size + dense_size + init_size + return_size + grid_size + total = sparse_size + dense_size + init_size + return_size # (; total, sparse_size, dense_size, init_size, return_size, grid_size) return total end function allocations(p::AbstractWindowedProblem, rast::AbstractRasterStack; nthreads=Threads.nthreads(), kw... ) + window_sizes = _window_problem_sizes(p, rast) # largest_first = sort!(collect(zip(vec(problem_sizes), vec(range_tuples))); rev=true) - range_tuples = vec(_window_ranges(p, rast)) # Use the allocations for the largest windows - allocs = map(range_tuples) do rs - return allocations(p.problem, rast[rs...]; nthreads, kw...) + allocs = map(window_sizes) do sz + return allocations(p.problem, sz; nthreads, kw...) end if p.threaded sum(sort(allocs)[1:min(end, nthreads)]) @@ -98,5 +97,4 @@ sizeofreturn(gm::GraphMeasure, sze) = sizeofreturn(returntype(gm), sze) sizeofreturn(::ReturnsDenseSpatial, (n, m)) = n * sizeof(Float64) sizeofreturn(::ReturnsSparse, (n, m)) = n * m * 8 # Roughly this for 8 neighbors sizeofreturn(::ReturnsScalar, (n, m)) = sizeof(Float64) -sizeofreturn(r::ReturnsOther, (n, m)) = r.f(n, m) - +sizeofreturn(r::ReturnsOther, (n, m)) = r.f(n, m) \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index eed3774..8269da6 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -281,10 +281,10 @@ function largest_subgraph(g::Grid) # affinities = convert(SparseMatrixCSC{Float64,Int}, graph[scci]) costmatrix = g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities) - id_to_grid_coordinate_list = NoWriteArray(g.id_to_grid_coordinate_list[scci]) - targetidx, targetnodes = map(NoWriteArray, _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list)) - qs = NoWriteArray([g.source_qualities[i] for i in id_to_grid_coordinate_list]) - qt = NoWriteArray([g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx]) + id_to_grid_coordinate_list = g.id_to_grid_coordinate_list[scci] + targetidx, targetnodes = _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list) + qs = [g.source_qualities[i] for i in id_to_grid_coordinate_list] + qt = [g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx] return Grid( g.nrows, g.ncols, diff --git a/src/tiles.jl b/src/tiles.jl index 3052838..27af6e6 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -31,9 +31,9 @@ end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) function solve(p::WindowedProblem, rast::RasterStack; - test_windows=false, - verbose=false, - mosaic_return=true + test_windows::Bool=false, + verbose::Bool=false, + mosaic_return::Bool=true ) window_ranges = _window_ranges(p, rast) window_indices = _window_indices(p, rast; window_ranges) @@ -92,23 +92,25 @@ end # sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) -function _max_window_problem_size(p::AbstractWindowedProblem, rast) - sizes = _window_problem_sizes(p::AbstractWindowedProblem, rast) - _, i = findmax(sizes) +function _max_window_problem_size(p::AbstractWindowedProblem, rast; kw...) + sizes = _window_problem_sizes(p, rast; kw...) + _, i = findmax(prod, sizes) return sizes[i] end # Calculate the maximum number of source and target values in any window -function _window_problem_sizes(p::AbstractWindowedProblem, rast) - rs = _window_ranges(p, rast) +function _window_problem_sizes(p::AbstractWindowedProblem, rast; + window_ranges=_window_ranges(p, rast) +) # Calculate the maximum number of source and target values in any window - return map(r -> _problem_size(p, rast, r), rs) + return map(r -> _problem_size(p, rast, r), window_ranges) end +_problem_size(p::AbstractProblem, rast) = _problem_size(p, rast, axes(rast)) function _problem_size(p::AbstractProblem, rast, ranges::Tuple) source_count = _valid_sources(count, p, rast, ranges) target_count = _valid_targets(count, p, rast, ranges) - return source_count * target_count + return source_count, target_count end """ @@ -164,12 +166,17 @@ function BatchProblem(problem::WindowedProblem; throw(ArgumentError("BatchProblem buffer must match WindowedProblem buffer. Got $buffer and $(problem.buffer)")) buffer end - centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) - map(centersize, ConScape.centersize(problem)) do bcs, wcs - rem(bcs, wcs) == 0 || - throw(ArgumentError("BatchProblem centersize must be a multiple of WindowedProblem centersize. Got $centersize and $(problem.centersize)")) + if isnothing(centersize) + x = problem.centersize * nwindows + centersize = x, x + else + centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) + map(centersize, ConScape.centersize(problem)) do bcs, wcs + rem(bcs, wcs) == 0 || + throw(ArgumentError("BatchProblem centersize must be a multiple of WindowedProblem centersize. Got $centersize and $(problem.centersize)")) + end + isnothing(nwindows) || throw(ArgumentError("Cannot specify both centersize and nwindows")) end - isnothing(nwindows) || throw(ArgumentError("Cannot specify both centersize and nwindows")) BatchProblem(; problem, buffer, centersize, kw...) end @@ -190,7 +197,8 @@ function solve(p::BatchProblem, rast::RasterStack; kw...) end # Single batch job for running on clusters function solve(p::BatchProblem, rast::RasterStack, i::Int; - window_indices=nothing, verbose=false, kw... + window_indices::Bool=nothing, + verbose::Bool=false, kw... ) # Indices i are contiguous so we need to spread them accross the actual tiles # that need to be done by first calculating or retrieving `window_indices`. @@ -247,21 +255,18 @@ function _write_joblist(p::BatchProblem; window_indices) end # Mosaic the stored files to a RasterStack -function Rasters.mosaic(p::BatchProblem; - to, lazy=false, filename=nothing, missingval=0.0, kw... -) +function Rasters.mosaic(p::BatchProblem; to, missingval=0.0, kw...) ranges = _window_ranges(p, to) - mask = _valid_window_mask(p, to, ranges) - paths = [_window_path(p, rs) for (rs, m) in zip(ranges, mask) if m] - stacks = [RasterStack(path; lazy, name) for path in paths if isdir(path)] + paths = [_window_path(p, rs) for rs in ranges] + stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] - return Rasters.mosaic(sum, stacks; to, filename, missingval, kw...) + return Rasters.mosaic(sum, stacks; missingval, to, kw...) end function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where K - path = mkpath(_window_path(p, ranges)) - return Rasters.write(joinpath(path, ""), output; - ext=p.ext, force=true, kw... + dir = mkpath(_window_path(p, ranges)) + return Rasters.write(joinpath(dir, ""), output; + ext=p.ext, force=true, verbose=false, kw... ) end From 5b26be2f39ab1a764da6b79ade1a5ef09427852b Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 29 Jan 2025 15:24:13 +0100 Subject: [PATCH 13/51] fix doc --- src/problem.jl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/problem.jl b/src/problem.jl index 04bb1f0..d4d35b2 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -1,11 +1,3 @@ -# Defined earlier in ConScape.jl for load order -# abstract type AbstractProblem end -@doc """ - Problem - -Abstract supertype for ConScape problem specifications. -""" Problem - # Recusive getters for nested problems graph_measures(p::AbstractProblem) = graph_measures(p.problem) connectivity_measure(p::AbstractProblem) = connectivity_measure(p.problem) From 1fa786aea9028f7a80d071f7a6b0c9831fca2a08 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 29 Jan 2025 15:26:18 +0100 Subject: [PATCH 14/51] fix type params --- src/solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers.jl b/src/solvers.jl index e96daaf..780d03e 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -367,7 +367,7 @@ function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1, K2} # And rename the NamedTuple NamedTuple{joinedkeys}(map(_maybe_raster, values(gm))) end -_mergename(::Val{K1}, gm) where {K1, K2} = +_mergename(::Val{K1}, gm) where K1 = # We keep the name as is NamedTuple{(K1,)}((_maybe_raster(gm),)) From 759b34ba395b94aaa1138a8ff1ba5bdddc6e6faa Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 29 Jan 2025 22:34:32 +0100 Subject: [PATCH 15/51] add basic windowed model init --- src/allocations.jl | 15 ----- src/grid.jl | 5 +- src/problem.jl | 15 +++-- src/tiles.jl | 151 +++++++++++++++++++++++++++++++++++---------- 4 files changed, 131 insertions(+), 55 deletions(-) diff --git a/src/allocations.jl b/src/allocations.jl index dcff652..18eb902 100644 --- a/src/allocations.jl +++ b/src/allocations.jl @@ -32,21 +32,6 @@ function allocations(p::Problem, sze::Tuple{Int,Int}; kw...) # (; total, sparse_size, dense_size, init_size, return_size, grid_size) return total end -function allocations(p::AbstractWindowedProblem, rast::AbstractRasterStack; - nthreads=Threads.nthreads(), kw... -) - window_sizes = _window_problem_sizes(p, rast) - # largest_first = sort!(collect(zip(vec(problem_sizes), vec(range_tuples))); rev=true) - # Use the allocations for the largest windows - allocs = map(window_sizes) do sz - return allocations(p.problem, sz; nthreads, kw...) - end - if p.threaded - sum(sort(allocs)[1:min(end, nthreads)]) - else - maximum(allocs; init=0) - end -end # This is approximate. diff --git a/src/grid.jl b/src/grid.jl index 8269da6..b4110f2 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -137,13 +137,14 @@ function Grid(rast::RasterStack; end, source_qualities=get(rast, :source_qualities, qualities), target_qualities=get(rast, :target_qualities, qualities), - costs=get(rast, :costs, MinusLog()), + costs=MinusLog(), kw... ) Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, costs, kw...) end # TODO move functions like MinusLog to problems and pass in here -Grid(p::AbstractProblem, rast::RasterStack; kw...) = Grid(rast; kw...) +Grid(p::AbstractProblem, rast::RasterStack; kw...) = + Grid(rast; costs=costs(p), prune=prune(p), kw...) Base.size(g::Grid) = (g.nrows, g.ncols) DimensionalData.dims(g::Grid) = g.dims diff --git a/src/problem.jl b/src/problem.jl index d4d35b2..a361f67 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -26,11 +26,13 @@ to be run in the same job. - `connectivity_measure`: A [`ConnectivityMeasure`](@ref). - `solver`: A [`Solver`](@ref) specification. """ -@kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver,DV} <: AbstractProblem +@kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver,DV,CO} <: AbstractProblem graph_measures::GM connectivity_measure::CM = LeastCostDistance() solver::SM = MatrixSolver() diagvalue::DV=nothing + costs::CO=MinusLog() + prune::Bool=true end Problem(graph_measures::Union{Tuple,NamedTuple}; kw...) = Problem(; graph_measures, kw...) @@ -38,13 +40,16 @@ diagvalue(p::Problem) = p.diagvalue graph_measures(p::Problem) = p.graph_measures connectivity_measure(p::Problem) = p.connectivity_measure solver(p::Problem) = p.solver +costs(p::Problem) = p.costs +prune(p::Problem) = p.prune + solve(p::Problem, rast::RasterStack) = solve!(init(p, rast), p) solve!(workspace::NamedTuple, p::Problem) = solve!(workspace, solver(p), connectivity_measure(p), p) -init(p::AbstractProblem, args...) = init!((;), p, args...) - # Init is conditional on solver and connectivity measure -init!(workspace::NamedTuple, p::AbstractProblem, rast::RasterStack) = - init!(workspace, solver(p), connectivity_measure(p), p, rast) \ No newline at end of file +init!(workspace::NamedTuple, p::Problem, rast::RasterStack) = + init!(workspace, solver(p), connectivity_measure(p), p, rast) + +init(p::AbstractProblem, args...; kw...) = init!((;), p, args...; kw...) \ No newline at end of file diff --git a/src/tiles.jl b/src/tiles.jl index 27af6e6..ff8d4ae 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -1,10 +1,11 @@ # This file is a work in progress... -abstract type AbstractWindowedProblem <: AbstractProblem end +abstract type AbstractWindowedProblem{P} <: AbstractProblem end +costs(p::AbstractWindowedProblem) = costs(p.problem) +prune(p::AbstractWindowedProblem) = prune(p.problem) buffer(p::AbstractWindowedProblem) = p.buffer buffer(p::AbstractProblem) = 0 - grain(::AbstractProblem) = nothing """ @@ -22,15 +23,21 @@ to be run over the same windowed grids. - `buffer`: the area outside the source window. - `threaded`: Whether to run in parallel. `false` by default """ -@kwdef struct WindowedProblem <: AbstractWindowedProblem - problem::AbstractProblem +@kwdef struct WindowedProblem{P} <: AbstractWindowedProblem{P} + problem::P centersize::Int buffer::Int threaded::Bool = false end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) -function solve(p::WindowedProblem, rast::RasterStack; +centersize(p::WindowedProblem) = p.centersize, p.centersize + +function solve(p::WindowedProblem, rast::RasterStack; kw...) + workspace = init(p, rast) + solve!(workspace, p, rast; kw...) +end +function solve!(workspace, p::WindowedProblem, rast::RasterStack; test_windows::Bool=false, verbose::Bool=false, mosaic_return::Bool=true @@ -40,7 +47,7 @@ function solve(p::WindowedProblem, rast::RasterStack; # Test outputs just return the inputs after window masking if test_windows output_stacks = map(window_indices) do i - _get_window_with_zeroed_buffer(rast, window_ranges[i], p) + _get_window_with_zeroed_buffer(p, rast, window_ranges[i]) end return if mosaic_return Rasters.mosaic(sum, collect(skipmissing(output_stacks)); @@ -52,11 +59,7 @@ function solve(p::WindowedProblem, rast::RasterStack; end # Set up channels for threading - n = p.threaded ? Threads.nthreads() : 1 - ch = Channel{NamedTuple}(n) - for _ in 1:n - put!(ch, (;)) - end + ch = workspace.channel # Define empty outputs output_stacks = Vector{RasterStack}(undef, length(window_indices)) # Define a runner for threaded/non-threaded operation @@ -64,7 +67,7 @@ function solve(p::WindowedProblem, rast::RasterStack; # Get a window range rs = window_ranges[ir] # verbose && println("Solving window $i $rs ") - rast_window = _get_window_with_zeroed_buffer(rast, rs, p) + rast_window = _get_window_with_zeroed_buffer(p, rast, rs) # Initialise the window using stored memory workspace = init!(take!(ch), p.problem, rast_window) # Solve for the window @@ -90,6 +93,22 @@ function solve(p::WindowedProblem, rast::RasterStack; end end +init(p::AbstractWindowedProblem, rast::RasterStack) = init!((;), p, rast) +function init!(workspace::NamedTuple, p::AbstractWindowedProblem, rast::RasterStack) + n = p.threaded ? Threads.nthreads() : 1 + workspace = if haskey(workspace, :channel) + workspace + else + channel = Channel{NamedTuple}(n) + for _ in 1:n + put!(channel, (;)) + end + (; channel) + end + @show workspace + return workspace +end + # sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) function _max_window_problem_size(p::AbstractWindowedProblem, rast; kw...) @@ -137,8 +156,8 @@ for nested operations. - `threaded`: Whether to run in parallel. `false` by default. If the problem is also threaded at some level it may be faster to set this to `false`. """ -@kwdef struct BatchProblem <: AbstractWindowedProblem - problem::AbstractProblem +@kwdef struct BatchProblem{P} <: AbstractWindowedProblem{P} + problem::P buffer::Int centersize::Tuple{Int,Int} datapath::String @@ -180,6 +199,8 @@ function BatchProblem(problem::WindowedProblem; BatchProblem(; problem, buffer, centersize, kw...) end +centersize(p::BatchProblem) = p.centersize + function solve(p::BatchProblem, rast::RasterStack; kw...) window_indices = _window_indices(p, rast) function run(i) @@ -197,25 +218,27 @@ function solve(p::BatchProblem, rast::RasterStack; kw...) end # Single batch job for running on clusters function solve(p::BatchProblem, rast::RasterStack, i::Int; - window_indices::Bool=nothing, - verbose::Bool=false, kw... + window_indices=nothing, + verbose::Bool=false, + kw... ) # Indices i are contiguous so we need to spread them accross the actual tiles # that need to be done by first calculating or retrieving `window_indices`. # Manual calculateion is best avoided when it means reading 10gb over a network. window_ranges = _window_ranges(p, rast) - if isnothing(window_indices) - window_indices = if isnothing(p.joblistpath) - _window_indices(p, rast) - else - _read_joblist(p) - end - end + # if isnothing(window_indices) + # if isnothing(p.joblistpath) + window_indices = _window_indices(p, rast) + # else + # _read_joblist(p) + # end + # end # Job i + @show window_indices window_ranges rs = window_ranges[window_indices[i]] # Get the current window for this job - rast_window = _get_window_with_zeroed_buffer(rast, rs, p) + rast_window = _get_window_with_zeroed_buffer(p, rast, rs) output = solve(p.problem, rast_window; kw...) # Store the output rasters for this job to disk if !ismissing(output) @@ -224,17 +247,77 @@ function solve(p::BatchProblem, rast::RasterStack, i::Int; return nothing end -function assess(p::BatchProblem, rast::RasterStack) - window_indices = _window_indices(p, rast) - _write_joblist(p; window_indices) - a = allocations(p, rast) - return (; max_allocations=a, njobs=length(window_indices)) +function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; + nthreads=Threads.nthreads(), + print=true, + kw... +) + window_ranges = _window_ranges(p, rast) + window_indices = _window_indices(p, rast; window_ranges) + window_sizes = _window_problem_sizes(p, rast; window_ranges=window_ranges[window_indices]) + allocs = map(window_ranges[window_indices]) do rs + window_rast = _get_window_with_zeroed_buffer(p, rast, rs) + allocations(p.problem, window_rast; nthreads, kw...) + end + max_allocations = if p.threaded + sum(sort(allocs)[1:min(end, nthreads)]) + else + maximum(allocs; init=0) + end + njobs = length(window_sizes) + + fields = (; njobs, max_allocations, allocations=allocs, window_sizes) + + print && display(pairs(fields)) + + return fields end +function assess( + p::AbstractWindowedProblem{<:AbstractWindowedProblem}, + rast::AbstractRasterStack; + nthreads=Threads.nthreads(), + print=true, + kw... +) + window_ranges = _window_ranges(p, rast) + window_indices = _window_indices(p, rast; window_ranges) + window_sizes = _window_problem_sizes(p, rast; window_ranges=window_ranges[window_indices]) + assesments = map(window_ranges[window_indices]) do rs + window_rast = _get_window_with_zeroed_buffer(p, rast, rs) + assess(p.problem, window_rast; nthreads, print=false, kw...) + end + inner_allocations = map(a -> a.allocations, assesments) + inner_window_sizes = map(a -> a.window_sizes, assesments) + max_allocations = if p.threaded + sum(sort(inner_allocations)[1:min(end, nthreads)]) + else + maximum(a -> maximum(a; init=0), inner_allocations; init=0) + end + njobs = length(window_sizes) + inner_window_counts = map(length, inner_window_sizes) + max_windows = maximum(inner_window_counts) + + fields = (; + njobs, + max_windows, + max_allocations, + inner_allocations, + window_sizes, + inner_window_counts, + inner_window_sizes + ) -grain(p::BatchProblem) = p.grain + print && display(pairs(fields)) -centersize(p::WindowedProblem) = p.centersize, p.centersize -centersize(p::BatchProblem) = p.centersize + return fields +end + +# function assess(p::BatchProblem, rast::RasterStack) +# window_indices = _window_indices(p, rast) +# _write_joblist(p; window_indices) +# a = allocations(p, rast) +# return (; max_allocations=a, njobs=length(window_indices)) +# end ### Batch utilities @@ -305,7 +388,9 @@ _valid_window_mask(p, ::Nothing, ranges) = nothing _valid_window_mask(p, rast::AbstractRasterStack, ranges) = map(r -> _valid_targets(any, p, rast, r), ranges) -function _get_window_with_zeroed_buffer(rast, rs, p::AbstractWindowedProblem) +_get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack) = + _get_window_with_zeroed_buffer(p, rast, axes(rast)) +function _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack, rs) b = buffer(p) fill = zero(eltype(rast.target_qualities)) dest = if isnothing(grain(p)) From 31091707c3c6b4e32d4045f7e18efb9d3a77c80a Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 3 Feb 2025 14:48:36 +0100 Subject: [PATCH 16/51] assesment tweaks --- src/problem.jl | 2 +- src/tiles.jl | 236 ++++++++++++++++++++++++++++++------------------- 2 files changed, 148 insertions(+), 90 deletions(-) diff --git a/src/problem.jl b/src/problem.jl index a361f67..091a41b 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -29,7 +29,7 @@ to be run in the same job. @kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver,DV,CO} <: AbstractProblem graph_measures::GM connectivity_measure::CM = LeastCostDistance() - solver::SM = MatrixSolver() + solver::SM= MatrixSolver() diagvalue::DV=nothing costs::CO=MinusLog() prune::Bool=true diff --git a/src/tiles.jl b/src/tiles.jl index ff8d4ae..9007921 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -40,10 +40,10 @@ end function solve!(workspace, p::WindowedProblem, rast::RasterStack; test_windows::Bool=false, verbose::Bool=false, - mosaic_return::Bool=true + mosaic_return::Bool=true, + window_ranges=_window_ranges(p, rast), + window_indices=_window_indices(p, rast; window_ranges), ) - window_ranges = _window_ranges(p, rast) - window_indices = _window_indices(p, rast; window_ranges) # Test outputs just return the inputs after window masking if test_windows output_stacks = map(window_indices) do i @@ -77,7 +77,7 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; end # Run the window problems if p.threaded - Threads.@threads :greedy for (i, ir) in enumerate(window_indices) + Threads.@threads for (i, ir) in enumerate(window_indices) run(i, ir) end else @@ -105,7 +105,6 @@ function init!(workspace::NamedTuple, p::AbstractWindowedProblem, rast::RasterSt end (; channel) end - @show workspace return workspace end @@ -161,7 +160,6 @@ for nested operations. buffer::Int centersize::Tuple{Int,Int} datapath::String - joblistpath::Union{String,Nothing}=nothing grain::Union{Nothing,Int} = nothing ext::String = ".tif" threaded::Bool = false @@ -201,13 +199,15 @@ end centersize(p::BatchProblem) = p.centersize -function solve(p::BatchProblem, rast::RasterStack; kw...) - window_indices = _window_indices(p, rast) +function solve(p::BatchProblem, rast::RasterStack; + window_indices=_window_indices(p, rast), + kw... +) function run(i) solve(p, rast, i; window_indices, kw...) end if p.threaded - Threads.@threads :greedy for i in eachindex(window_indices) + Threads.@threads for i in eachindex(window_indices) run(i) end else @@ -218,33 +218,24 @@ function solve(p::BatchProblem, rast::RasterStack; kw...) end # Single batch job for running on clusters function solve(p::BatchProblem, rast::RasterStack, i::Int; - window_indices=nothing, + window_indices=_window_indices(p, rast), verbose::Bool=false, kw... ) - # Indices i are contiguous so we need to spread them accross the actual tiles - # that need to be done by first calculating or retrieving `window_indices`. - # Manual calculateion is best avoided when it means reading 10gb over a network. + # Get the ranges of all jobs window_ranges = _window_ranges(p, rast) - # if isnothing(window_indices) - # if isnothing(p.joblistpath) - window_indices = _window_indices(p, rast) - # else - # _read_joblist(p) - # end - # end - - # Job i - @show window_indices window_ranges + + # Get the window range of job i rs = window_ranges[window_indices[i]] - # Get the current window for this job + + # Get the raster data for job i rast_window = _get_window_with_zeroed_buffer(p, rast, rs) + + # Solve for this raster output = solve(p.problem, rast_window; kw...) - # Store the output rasters for this job to disk - if !ismissing(output) - _store(p, output, rs; verbose) - end - return nothing + + # Store the output rasters for this job to disk and return the file path + return _store(p, output, rs; verbose) end function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; @@ -252,59 +243,137 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack print=true, kw... ) + + # Define the ranges of each window window_ranges = _window_ranges(p, rast) - window_indices = _window_indices(p, rast; window_ranges) - window_sizes = _window_problem_sizes(p, rast; window_ranges=window_ranges[window_indices]) - allocs = map(window_ranges[window_indices]) do rs - window_rast = _get_window_with_zeroed_buffer(p, rast, rs) - allocations(p.problem, window_rast; nthreads, kw...) + + # Calculate window sizes and allocations + sizes_and_allocs = map(vec(window_ranges)) do rs + window_rast = view(rast, rs...) + sze = _problem_size(p, window_rast) + allocs = allocations(p.problem, sze; nthreads, kw...) + sze, allocs end + + # Organise stats for each window into vectors + window_sizes = first.(sizes_and_allocs) + window_allocations = last.(sizes_and_allocs) + window_mask = map(s -> prod(s) > 0, window_sizes) + window_indices = eachindex(window_mask)[window_mask] + + # Caclulate allocations, with threading context max_allocations = if p.threaded - sum(sort(allocs)[1:min(end, nthreads)]) + # Take the top nthreads allocations + # Each thread will need to allocate its own workspace + sum(sort(window_allocations)[1:min(end, nthreads)]) else - maximum(allocs; init=0) + # One maximum workspace is allocated and reused + maximum(window_allocations; init=0) end - njobs = length(window_sizes) - - fields = (; njobs, max_allocations, allocations=allocs, window_sizes) - - print && display(pairs(fields)) - return fields + # Calculate global stats + njobs = count(window_mask) + shape = size(window_ranges) + + return (; + shape, + njobs, + max_allocations, + window_allocations, + window_ranges, + window_sizes, + window_mask, + window_indices, + ) end function assess( p::AbstractWindowedProblem{<:AbstractWindowedProblem}, rast::AbstractRasterStack; - nthreads=Threads.nthreads(), + nthreads=Threads.nthreads(), print=true, kw... ) + # Calculate outer window ranges window_ranges = _window_ranges(p, rast) - window_indices = _window_indices(p, rast; window_ranges) - window_sizes = _window_problem_sizes(p, rast; window_ranges=window_ranges[window_indices]) - assesments = map(window_ranges[window_indices]) do rs - window_rast = _get_window_with_zeroed_buffer(p, rast, rs) - assess(p.problem, window_rast; nthreads, print=false, kw...) + @show length(window_ranges) + + # Define a channel to store window raster and reuse memory + channel = Channel{Any}(Threads.nthreads()) + for i in 1:nthreads + put!(channel, _get_window_with_zeroed_buffer(p, rast, first(window_ranges))) + end + + # Define a vector for all assessment data + assessments = Vector{Any}(undef, length(window_ranges)) + + # Run assessments threaded as they can take a long time for large rasters + Threads.@threads for i in eachindex(vec(window_ranges)) + rs = window_ranges[i] + println("Assessing batch: $i, $rs") + window_rast = take!(channel) + window_rast = if map(length, rs) == size(window_rast) + _get_window_with_zeroed_buffer!(window_rast, p, rast, rs) + else + _get_window_with_zeroed_buffer(p, rast, rs) + end + # Skip NaN only rasters + nvalid = count(x -> !(isnan(x) || x == 0), window_rast.target_qualities) + assessments[i] = if nvalid > 0 + assess(p.problem, window_rast; nthreads, print=false, kw...) + else + println(" No targets found") + (; + shape=(0, 0), + njobs=0, + max_allocations=0, + window_allocations=Int[], + window_ranges=Tuple{UnitRange,UnitRange}[], + window_sizes=Tuple{Int,Int}[], + window_mask=Bool[], + window_indices=Int[], + ) + end + put!(channel, window_rast) end - inner_allocations = map(a -> a.allocations, assesments) - inner_window_sizes = map(a -> a.window_sizes, assesments) + + # Get vectors of vectors from inner problem + inner_window_allocations = map(a -> a.window_allocations, assessments) + inner_window_ranges = map(a -> a.window_ranges, assessments) + inner_window_sizes = map(a -> a.window_sizes, assessments) + inner_window_masks = map(a -> a.window_mask, assessments) + inner_window_indices = map(a -> a.window_indices, assessments) + inner_window_counts = map(length, inner_window_sizes) + inner_window_jobs = map(a -> a.njobs, assessments) + + # Get outer problem vectors + window_mask = map(any, inner_window_masks) + window_indices = eachindex(vec(window_mask))[window_mask] + + # Calculate global stats max_allocations = if p.threaded sum(sort(inner_allocations)[1:min(end, nthreads)]) else - maximum(a -> maximum(a; init=0), inner_allocations; init=0) + maximum(a -> maximum(a; init=0), inner_window_allocations; init=0) end - njobs = length(window_sizes) - inner_window_counts = map(length, inner_window_sizes) + njobs = count(window_mask) max_windows = maximum(inner_window_counts) + shape = size(window_ranges) fields = (; + shape, njobs, max_windows, max_allocations, - inner_allocations, - window_sizes, + window_ranges, + window_indices, + window_mask, + inner_window_jobs, + inner_window_allocations, inner_window_counts, - inner_window_sizes + inner_window_ranges, + inner_window_sizes, + inner_window_indices, + inner_window_masks, ) print && display(pairs(fields)) @@ -312,31 +381,6 @@ function assess( return fields end -# function assess(p::BatchProblem, rast::RasterStack) -# window_indices = _window_indices(p, rast) -# _write_joblist(p; window_indices) -# a = allocations(p, rast) -# return (; max_allocations=a, njobs=length(window_indices)) -# end - -### Batch utilities - -function _read_joblist(p::BatchProblem) - # Read indices from the joblist file. This is generated in `assess` - isfile(p.joblistpath) || throw(ArgumentError("joblistpath $(p.joblistpath) does not exist")) - return parse.(Int, readlines(p.joblistpath)) -end - -function _write_joblist(p::BatchProblem; window_indices) - if !isnothing(p.joblistpath) - open(p.joblistpath, "w") do io - for i in window_indices - println(io, i) - end - end - end -end - # Mosaic the stored files to a RasterStack function Rasters.mosaic(p::BatchProblem; to, missingval=0.0, kw...) ranges = _window_ranges(p, to) @@ -363,10 +407,11 @@ end ### Shared utilities function _window_indices(p, rast; - window_ranges=_window_ranges(p, rast) + window_ranges=_window_ranges(p, rast), + window_sizes=_window_sizes(p, rast; window_ranges) ) # Get the Bool mask of needed windows - mask = _valid_window_mask(p, rast, window_ranges) + mask = prod.(window_sizes) .> 0 # Get the Int indices of the needed windows return eachindex(mask)[vec(mask)] end @@ -378,16 +423,29 @@ function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRa ws1, ws2 = windowsize = 2buffer .+ centersize cs1, cs2 = centersize # Define the corners of each window - corners = CartesianIndices(size)[begin:cs1:end, begin:cs2:end] + corners = CartesianIndices(size)[begin:cs1:end-2buffer, begin:cs2:end-2buffer] # Create an iterator of ranges for retreiving each window return [map((i, s, ws) -> i:min(s, i + ws-1), Tuple(c), size, windowsize) for c in corners] end -# Create a mask to skip tiles that have no target cells -_valid_window_mask(p, ::Nothing, ranges) = nothing -_valid_window_mask(p, rast::AbstractRasterStack, ranges) = - map(r -> _valid_targets(any, p, rast, r), ranges) - +# _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack) = + # _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) +function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) + b = buffer(p) + fill = zero(eltype(rast.target_qualities)) + window = view(rast, rs...) + maplayers(dest, window) do d, w + parent(parent(d)) .= parent(w) + end + if !isnothing(grain(p)) + coarse_graining!(dest, grain(p)) + end + dest.target_qualities[begin:min(begin+b-1, end), :] .= fill + dest.target_qualities[:, begin:min(begin+b-1, end)] .= fill + dest.target_qualities[max(end-b+1, begin):end, :] .= fill + dest.target_qualities[:, max(end-b+1, begin):end] .= fill + return rebuild(dest; dims=dims(window)) +end _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack) = _get_window_with_zeroed_buffer(p, rast, axes(rast)) function _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack, rs) From 305fd5607d1a1644231afa8a154da6b50974e645 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 4 Feb 2025 10:33:48 +0100 Subject: [PATCH 17/51] updates, windowing a litte broken --- src/grid.jl | 22 ++++++---- src/problem.jl | 12 +++--- src/solvers.jl | 31 +++++++++++--- src/tiles.jl | 106 ++++++++++++++++++++++++++++-------------------- test/problem.jl | 31 +++++++------- 5 files changed, 122 insertions(+), 80 deletions(-) diff --git a/src/grid.jl b/src/grid.jl index b4110f2..d3a81e0 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -94,12 +94,12 @@ function Grid(nrows::Integer, # if any(t -> t < 0, nonzeros(costmatrix)) # throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) # end - cost_digraph = SimpleDiGraph(costmatrix) - affinity_digraph = SimpleDiGraph(affinities) + # cost_digraph = SimpleDiGraph(costmatrix) + # affinity_digraph = SimpleDiGraph(affinities) - if ne(difference(cost_digraph, affinity_digraph)) > 0 - throw(ArgumentError("cost graph contains edges not present in the affinity graph")) - end + # if ne(difference(cost_digraph, affinity_digraph)) > 0 + # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + # end targetidx, targetnodes = _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) qs = [_source_qualities[i] for i in id_to_grid_coordinate_list] @@ -129,7 +129,7 @@ function Grid(nrows::Integer, end function Grid(rast::RasterStack; qualities=get(rast, :qualities) do - ones(nrows, ncols) + ones(size(rast)) end, affinities=let affinities_raster = get(rast, :affinities, nothing) @@ -137,10 +137,9 @@ function Grid(rast::RasterStack; end, source_qualities=get(rast, :source_qualities, qualities), target_qualities=get(rast, :target_qualities, qualities), - costs=MinusLog(), kw... ) - Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, costs, kw...) + Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, kw...) end # TODO move functions like MinusLog to problems and pass in here Grid(p::AbstractProblem, rast::RasterStack; kw...) = @@ -176,6 +175,7 @@ _unwrap(R::AbstractMatrix) = R # Compute a vector of the cartesian indices of nonzero target qualities and # the corresponding node id corresponding to the indices _targetidx(q::AbstractMatrix, grididxs::AbstractVector) = grididxs +_targetidx(q::Raster, grididxs::AbstractVector) = _targetidx(parent(q), grididxs) _targetidx(q::SparseMatrixCSC, grididxs::AbstractVector) = CartesianIndex.(findnz(q)[1:2]...) ∩ grididxs @@ -183,6 +183,12 @@ _targetidx_and_nodes(g::Grid) = _targetidx_and_nodes(g.target_qualities, g.id_to_grid_coordinate_list) function _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) targetidx = _targetidx(target_qualities, id_to_grid_coordinate_list) + # targetnodes = Vector{Int}(undef, length(targetidx)) + # n = findfirst(==(id_to_grid_coordinate_list[1]), targetnodes) + # targetnodes[1] = n + # for i in eachindex(id_to_grid_coordinate_list)[2:end] + # findnext(==(id_to_grid_coordinate_list[i]), targetnodes, n) + # end targetnodes = findall( t -> t ∈ targetidx, id_to_grid_coordinate_list) diff --git a/src/problem.jl b/src/problem.jl index 091a41b..6ba3266 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -44,12 +44,14 @@ costs(p::Problem) = p.costs prune(p::Problem) = p.prune -solve(p::Problem, rast::RasterStack) = solve!(init(p, rast), p) -solve!(workspace::NamedTuple, p::Problem) = - solve!(workspace, solver(p), connectivity_measure(p), p) +solve(p::Problem, rast::RasterStack; kw...) = solve!(init(p, rast; kw...), p; kw...) +solve!(workspace::NamedTuple, p::Problem; kw...) = + solve!(workspace, solver(p), connectivity_measure(p), p; kw...) # Init is conditional on solver and connectivity measure -init!(workspace::NamedTuple, p::Problem, rast::RasterStack) = - init!(workspace, solver(p), connectivity_measure(p), p, rast) +function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; kw...) + println("Initialising for $(solver(p))") + init!(workspace, solver(p), connectivity_measure(p), p, rast; kw...) +end init(p::AbstractProblem, args...; kw...) = init!((;), p, args...; kw...) \ No newline at end of file diff --git a/src/solvers.jl b/src/solvers.jl index 780d03e..e47e9cf 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -5,11 +5,16 @@ function init!( s::Solver, cm::FundamentalMeasure, p::AbstractProblem, - rast::RasterStack, + rast::RasterStack; + verbose=true ) + verbose = true + verbose && println("Defining grid for RasterStack size $(size(rast))...") grid = g = Grid(p, rast) + verbose && println("Retreiving measures...") gms = graph_measures(p) cf = connectivity_function(p) + verbose && println("Defining sparse arrays of size $(size(g.affinities))...") Pref = _Pref(g.affinities) W = _W(Pref, cm.θ, g.costmatrix) # Sparse lhs @@ -17,8 +22,10 @@ function init!( # Sparse rhs B_sparse = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) # A_init = haskey(ws, :A_init) ? init(s, A) : init!(ws.A_init, s, A) + verbose && println("Initialising factorizations...") A_init = init(s, A) # B_dense becomes Z + verbose && println("Allocating workspaces...") B_dense = haskey(ws, :Z) ? copyto!(_resize(ws.Z, size(B_sparse)), B_sparse) : Matrix(B_sparse) n_workspaces = count_workspaces(p) n_permuted_workspaces = count_permuted_workspaces(p) @@ -33,11 +40,14 @@ function init!( else [similar(B_dense') for _ in 1:n_permuted_workspaces] end + + verbose && println("Solving Z matrix...") Z = ldiv!(s, A_init, B_dense; B_copy=copyto!(workspaces[1], B_dense)) # Check that values in Z are not too small: _check_z(s, Z, W, g) grsp = GridRSP(grid, cm.θ, Pref, W, Z) + verbose && println("Calculating inverses...") Zⁱ = if hastrait(needs_inv, gms) haskey(ws, :Zⁱ) ? _inv!(_reshape(ws.Zⁱ, size(Z)), Z) : _inv(Z) else @@ -62,27 +72,32 @@ function init!( else nothing, nothing end + # Create an intermediate workspace to use in computations workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init) + expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + verbose && println("Calculating expected cost...") ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p)) else nothing end free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + verbose && println("Calculating free energy distance...") ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p)) else nothing end proximities = if hastrait(needs_proximity, gms) + verbose && println("Calculating proximities...") # We populate this during `solve` haskey(ws, :proximities) ? _reshape(ws.proximities, size(Z)) : similar(Z) else nothing end - # TODO make a trait CW = grsp.g.costmatrix .* grsp.W + verbose && println("Finished workspace...") return (; grid, grsp, workspace_kw..., CW, free_energy_distances, expected_costs, proximities) end @@ -91,7 +106,8 @@ function solve!( workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, - p::AbstractProblem, + p::AbstractProblem; + verbose=false ) g = workspace.grid return map(p.graph_measures) do gm @@ -102,7 +118,8 @@ function solve!( workspace::NamedTuple, s::Solver, cm::FundamentalMeasure, - p::Problem, + p::Problem; + verbose=false ) g = workspace.grid gms = graph_measures(p) @@ -152,7 +169,11 @@ function solve!( return _merge_to_stack(results) end -function init!(workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack) + +function _init!( + workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; + verbose=false, +) # TODO what is needed here? return (; grid=Grid(p, rast)) end diff --git a/src/tiles.jl b/src/tiles.jl index 9007921..bd68aaa 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -39,10 +39,11 @@ function solve(p::WindowedProblem, rast::RasterStack; kw...) end function solve!(workspace, p::WindowedProblem, rast::RasterStack; test_windows::Bool=false, - verbose::Bool=false, + verbose::Bool=true, mosaic_return::Bool=true, window_ranges=_window_ranges(p, rast), window_indices=_window_indices(p, rast; window_ranges), + window_sizes=_window_sizes(p, rast; window_ranges), ) # Test outputs just return the inputs after window masking if test_windows @@ -58,31 +59,44 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; end end + n = max(length(window_indices), p.threaded ? Threads.nthreads() : 1) + ch = Channel{NamedTuple}(n) + for _ in 1:n + put!(ch, (;)) + end + + sorted_indices = last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)) + verbose && @show sorted_indices # Set up channels for threading - ch = workspace.channel + # ch = workspace.channel # Define empty outputs - output_stacks = Vector{RasterStack}(undef, length(window_indices)) + output_stacks = Vector{RasterStack}(undef, length(sorted_indices)) # Define a runner for threaded/non-threaded operation function run(i, ir) # Get a window range rs = window_ranges[ir] + verbose && println("Running job $ir on ranges $rs and thread $(Threads.threadid())") # verbose && println("Solving window $i $rs ") rast_window = _get_window_with_zeroed_buffer(p, rast, rs) # Initialise the window using stored memory - workspace = init!(take!(ch), p.problem, rast_window) + verbose && println("Getting workspace from channel...") + workspace = take!(ch) + verbose && println("Initialising window from size $(size(rast_window)), from ranges $rs...") + workspace = init!(workspace, p.problem, rast_window; verbose) # Solve for the window + verbose && println("Solving window $rs...") output_stacks[i] = solve!(workspace, p.problem) # Return the workspace to the channel put!(ch, workspace) end # Run the window problems if p.threaded - Threads.@threads for (i, ir) in enumerate(window_indices) - run(i, ir) + Threads.@threads for i in eachindex(sorted_indices) + run(i, sorted_indices[i]) end else - for (i, ir) in enumerate(window_indices) - run(i, ir) + for i in eachindex(sorted_indices) + run(i, sorted_indices[i]) end end # Maybe mosaic the output @@ -93,23 +107,22 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; end end -init(p::AbstractWindowedProblem, rast::RasterStack) = init!((;), p, rast) -function init!(workspace::NamedTuple, p::AbstractWindowedProblem, rast::RasterStack) +init(p::AbstractWindowedProblem, rast::RasterStack; kw...) = init!((;), p, rast; kw...) +function init!(workspace::NamedTuple, p::AbstractWindowedProblem, rast::RasterStack; kw...) + # TODO actually allocate n = p.threaded ? Threads.nthreads() : 1 - workspace = if haskey(workspace, :channel) - workspace - else - channel = Channel{NamedTuple}(n) - for _ in 1:n - put!(channel, (;)) - end - (; channel) - end + # workspace = if haskey(workspace, :channel) + # workspace + # else + # channel = Channel{NamedTuple}(n) + # for _ in 1:n + # put!(channel, (;)) + # end + # (; channel) + # end return workspace end -# sorted_ranges = collect(last.(sort!(map(rs -> prod(_size(p, rast, rs)) => rs, used_ranges)))) - function _max_window_problem_size(p::AbstractWindowedProblem, rast; kw...) sizes = _window_problem_sizes(p, rast; kw...) _, i = findmax(prod, sizes) @@ -126,8 +139,8 @@ end _problem_size(p::AbstractProblem, rast) = _problem_size(p, rast, axes(rast)) function _problem_size(p::AbstractProblem, rast, ranges::Tuple) - source_count = _valid_sources(count, p, rast, ranges) - target_count = _valid_targets(count, p, rast, ranges) + source_count = parent(_valid_sources(count, p, rast, ranges)) + target_count = parent(_valid_targets(count, p, rast, ranges)) return source_count, target_count end @@ -280,7 +293,6 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack njobs, max_allocations, window_allocations, - window_ranges, window_sizes, window_mask, window_indices, @@ -291,11 +303,11 @@ function assess( rast::AbstractRasterStack; nthreads=Threads.nthreads(), print=true, + verbose=false, kw... ) # Calculate outer window ranges window_ranges = _window_ranges(p, rast) - @show length(window_ranges) # Define a channel to store window raster and reuse memory channel = Channel{Any}(Threads.nthreads()) @@ -309,7 +321,7 @@ function assess( # Run assessments threaded as they can take a long time for large rasters Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] - println("Assessing batch: $i, $rs") + verbose && println("Assessing batch: $i, $rs") window_rast = take!(channel) window_rast = if map(length, rs) == size(window_rast) _get_window_with_zeroed_buffer!(window_rast, p, rast, rs) @@ -317,7 +329,7 @@ function assess( _get_window_with_zeroed_buffer(p, rast, rs) end # Skip NaN only rasters - nvalid = count(x -> !(isnan(x) || x == 0), window_rast.target_qualities) + nvalid = count(_isnvalid, window_rast.target_qualities) assessments[i] = if nvalid > 0 assess(p.problem, window_rast; nthreads, print=false, kw...) else @@ -327,7 +339,6 @@ function assess( njobs=0, max_allocations=0, window_allocations=Int[], - window_ranges=Tuple{UnitRange,UnitRange}[], window_sizes=Tuple{Int,Int}[], window_mask=Bool[], window_indices=Int[], @@ -338,7 +349,6 @@ function assess( # Get vectors of vectors from inner problem inner_window_allocations = map(a -> a.window_allocations, assessments) - inner_window_ranges = map(a -> a.window_ranges, assessments) inner_window_sizes = map(a -> a.window_sizes, assessments) inner_window_masks = map(a -> a.window_mask, assessments) inner_window_indices = map(a -> a.window_indices, assessments) @@ -364,13 +374,11 @@ function assess( njobs, max_windows, max_allocations, - window_ranges, window_indices, window_mask, inner_window_jobs, inner_window_allocations, inner_window_counts, - inner_window_ranges, inner_window_sizes, inner_window_indices, inner_window_masks, @@ -416,6 +424,13 @@ function _window_indices(p, rast; return eachindex(mask)[vec(mask)] end +function _window_sizes(p, rast::RasterStack; window_ranges=_window_ranges(p, rast)) + map(window_ranges) do rs + window_rast = view(rast, rs...) + _problem_size(p, window_rast) + end +end + function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) size = Base.size(rast) centersize = ConScape.centersize(p) @@ -428,8 +443,8 @@ function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRa return [map((i, s, ws) -> i:min(s, i + ws-1), Tuple(c), size, windowsize) for c in corners] end -# _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack) = - # _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) +# _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) + function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) b = buffer(p) fill = zero(eltype(rast.target_qualities)) @@ -450,17 +465,16 @@ _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack) = _get_window_with_zeroed_buffer(p, rast, axes(rast)) function _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack, rs) b = buffer(p) - fill = zero(eltype(rast.target_qualities)) - dest = if isnothing(grain(p)) - rast[rs...] - else - coarse_graining(view(rast, rs), grain(p)) + dest = view(rast, rs...) + tq = dest.target_qualities + tq_sparse = spzeros(eltype(tq), size(tq)) + center_ranges = map(s -> b:s-b, size(tq)) + tq_sparse[center_ranges...] = tq[center_ranges...] + if !isnothing(grain(p)) + tq_sparse = coarse_graining(tq_sparse, grain(p)) end - dest.target_qualities[begin:min(begin+b-1, end), :] .= fill - dest.target_qualities[:, begin:min(begin+b-1, end)] .= fill - dest.target_qualities[max(end-b+1, begin):end, :] .= fill - dest.target_qualities[:, max(end-b+1, begin):end] .= fill - return dest + + return merge(dest, (; target_qualities=rebuild(tq; data=tq_sparse))) end # Apply function `f` to the validity (Bool) of each window. Empty windows are false. @@ -472,7 +486,7 @@ function _valid_sources(f, p, rast::AbstractRasterStack, source_ranges::Tuple) window = view(rast.qualities, source_ranges...) # If there are non-NaN cells above zero, keep the window # TODO allow users to change this condition? - f(x -> !isnan(x) && x > zero(x), window) + return _isvalid.(window) end function _valid_targets( f, p, rast::AbstractRasterStack, source_ranges::Tuple @@ -486,7 +500,9 @@ function _valid_targets( window = view(rast.target_qualities, target_ranges...) # If there are non-NaN cells above zero, keep the window # TODO allow users to change this condition? - f(x -> !isnan(x) && x > zero(x), window) + return _isvalid.(window) end +_isvalid(x) = !isnan(x) && x > zero(x) + _resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file diff --git a/test/problem.jl b/test/problem.jl index 8e7dec5..056de16 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -1,6 +1,7 @@ +nothing using ConScape, Test, SparseArrays, LinearAlgebra -using Rasters, ArchGDAL, NCDatasets, Plots -using LinearSolve +using Rasters, ArchGDAL +using ConScape.LinearSolve datadir = joinpath(dirname(pathof(ConScape)), "..", "data") _tempdir = mkdir(tempname()) @@ -55,8 +56,8 @@ test_grsp = ConScape.GridRSP(test_g; θ) solvers = ( ConScape.MatrixSolver(), ConScape.VectorSolver(), - ConScape.VectorSolver(; threaded=true), - ConScape.LinearSolver(), + # ConScape.VectorSolver(; threaded=true), + # ConScape.LinearSolver(), ) solver = ConScape.VectorSolver(; threaded=true) solver = ConScape.MatrixSolver() @@ -80,9 +81,7 @@ for solver in solvers @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) end - ConScape.allocations(problem, rast).total / 1e6 - - @time result = ConScape.solve(problem, workspace); + result = ConScape.solve!(workspace, problem); # @profview result = ConScape.solve(problem, workspace) @test result isa NamedTuple @test size(result.ch_one) == size(rast) @@ -157,6 +156,8 @@ expected_layers = (:betk, :ch) solver = ConScape.MatrixSolver() problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) +# ConScape.allocations(problem, rast) / 1e6 +solve(problem, rast; verbose=true) @testset "target mosaicing matches original" begin # TODO note that this breaks if q weighting is included @@ -164,15 +165,11 @@ problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) buffer=10, centersize=5, threaded=false ) @test collect(ConScape._window_ranges(windowed_problem, rast)) == [ - (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) (1:25, 41:59) (1:25, 46:59) (1:25, 51:59) (1:25, 56:59) - (6:30, 1:25) (6:30, 6:30) (6:30, 11:35) (6:30, 16:40) (6:30, 21:45) (6:30, 26:50) (6:30, 31:55) (6:30, 36:59) (6:30, 41:59) (6:30, 46:59) (6:30, 51:59) (6:30, 56:59) - (11:35, 1:25) (11:35, 6:30) (11:35, 11:35) (11:35, 16:40) (11:35, 21:45) (11:35, 26:50) (11:35, 31:55) (11:35, 36:59) (11:35, 41:59) (11:35, 46:59) (11:35, 51:59) (11:35, 56:59) - (16:40, 1:25) (16:40, 6:30) (16:40, 11:35) (16:40, 16:40) (16:40, 21:45) (16:40, 26:50) (16:40, 31:55) (16:40, 36:59) (16:40, 41:59) (16:40, 46:59) (16:40, 51:59) (16:40, 56:59) - (21:44, 1:25) (21:44, 6:30) (21:44, 11:35) (21:44, 16:40) (21:44, 21:45) (21:44, 26:50) (21:44, 31:55) (21:44, 36:59) (21:44, 41:59) (21:44, 46:59) (21:44, 51:59) (21:44, 56:59) - (26:44, 1:25) (26:44, 6:30) (26:44, 11:35) (26:44, 16:40) (26:44, 21:45) (26:44, 26:50) (26:44, 31:55) (26:44, 36:59) (26:44, 41:59) (26:44, 46:59) (26:44, 51:59) (26:44, 56:59) - (31:44, 1:25) (31:44, 6:30) (31:44, 11:35) (31:44, 16:40) (31:44, 21:45) (31:44, 26:50) (31:44, 31:55) (31:44, 36:59) (31:44, 41:59) (31:44, 46:59) (31:44, 51:59) (31:44, 56:59) - (36:44, 1:25) (36:44, 6:30) (36:44, 11:35) (36:44, 16:40) (36:44, 21:45) (36:44, 26:50) (36:44, 31:55) (36:44, 36:59) (36:44, 41:59) (36:44, 46:59) (36:44, 51:59) (36:44, 56:59) - (41:44, 1:25) (41:44, 6:30) (41:44, 11:35) (41:44, 16:40) (41:44, 21:45) (41:44, 26:50) (41:44, 31:55) (41:44, 36:59) (41:44, 41:59) (41:44, 46:59) (41:44, 51:59) (41:44, 56:59) + (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) + (6:30, 1:25) (6:30, 6:30) (6:30, 11:35) (6:30, 16:40) (6:30, 21:45) (6:30, 26:50) (6:30, 31:55) (6:30, 36:59) + (11:35, 1:25) (11:35, 6:30) (11:35, 11:35) (11:35, 16:40) (11:35, 21:45) (11:35, 26:50) (11:35, 31:55) (11:35, 36:59) + (16:40, 1:25) (16:40, 6:30) (16:40, 11:35) (16:40, 16:40) (16:40, 21:45) (16:40, 26:50) (16:40, 31:55) (16:40, 36:59) + (21:44, 1:25) (21:44, 6:30) (21:44, 11:35) (21:44, 16:40) (21:44, 21:45) (21:44, 26:50) (21:44, 31:55) (21:44, 36:59) ] test_results = ConScape.solve(windowed_problem, rast; test_windows=true) inner_targets = copy(rast.target_qualities) @@ -191,7 +188,7 @@ end buffer, centersize=5, threaded=false ) mask!(rast; with=rast) - rast_inner = ConScape._get_window_with_zeroed_buffer(rast, axes(rast), windowed_problem) + rast_inner = ConScape._get_window_with_zeroed_buffer(windowed_problem, rast, axes(rast)) @time wp_result = ConScape.solve(windowed_problem, rast) @time p_result = ConScape.solve(problem, rast_inner) # plot(p_result) From 9c5f7adbd8eb4ba8561fbc6b67e9d5cf4271fda3 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 6 Feb 2025 12:21:26 +0100 Subject: [PATCH 18/51] update for batch assesments --- src/gridrsp.jl | 5 ++- src/solvers.jl | 2 +- src/tiles.jl | 112 ++++++++++++++++++++++++++++------------------- test/runtests.jl | 6 +-- 4 files changed, 75 insertions(+), 50 deletions(-) diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 2c2d953..66b97e3 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -12,10 +12,11 @@ end Construct a GridRSP from a `g::Grid` based on the inverse temperature parameter `θ::Real`. """ -function GridRSP(g::Grid; θ=nothing) +function GridRSP(g::Grid; θ=nothing, verbose=true) Pref = _Pref(g.affinities) W = _W(Pref, θ, g.costmatrix) + error() @debug("Computing fundamental matrix of non-absorbing paths (Z). Please be patient...") Z = (I - W)\Matrix(sparse(g.targetnodes, 1:length(g.targetnodes), @@ -23,7 +24,7 @@ function GridRSP(g::Grid; θ=nothing) size(g.costmatrix, 1), length(g.targetnodes))) # Check that values in Z are not too small: - if minimum(Z)*minimum(nonzeros(g.costmatrix .* W)) == 0 + verbose && if minimum(Z)*minimum(nonzeros(g.costmatrix .* W)) == 0 @warn "Warning: Z-matrix contains too small values, which can lead to inaccurate results! Check that the graph is connected or try decreasing θ." end diff --git a/src/solvers.jl b/src/solvers.jl index e47e9cf..2fde587 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -44,7 +44,7 @@ function init!( verbose && println("Solving Z matrix...") Z = ldiv!(s, A_init, B_dense; B_copy=copyto!(workspaces[1], B_dense)) # Check that values in Z are not too small: - _check_z(s, Z, W, g) + # verbose && _check_z(s, Z, W, g) grsp = GridRSP(grid, cm.θ, Pref, W, Z) verbose && println("Calculating inverses...") diff --git a/src/tiles.jl b/src/tiles.jl index bd68aaa..9e88f24 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -42,8 +42,9 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; verbose::Bool=true, mosaic_return::Bool=true, window_ranges=_window_ranges(p, rast), - window_indices=_window_indices(p, rast; window_ranges), window_sizes=_window_sizes(p, rast; window_ranges), + window_indices=_window_indices(p, rast; window_ranges), + timed=false, ) # Test outputs just return the inputs after window masking if test_windows @@ -72,10 +73,10 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; # Define empty outputs output_stacks = Vector{RasterStack}(undef, length(sorted_indices)) # Define a runner for threaded/non-threaded operation - function run(i, ir) + function run(i, iw) # Get a window range - rs = window_ranges[ir] - verbose && println("Running job $ir on ranges $rs and thread $(Threads.threadid())") + rs = window_ranges[iw] + verbose && println("Running job $iw on ranges $rs and thread $(Threads.threadid())") # verbose && println("Solving window $i $rs ") rast_window = _get_window_with_zeroed_buffer(p, rast, rs) # Initialise the window using stored memory @@ -89,21 +90,37 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; # Return the workspace to the channel put!(ch, workspace) end + window_elapsed = Vector{Pair{Float64,Int64}}(undef, length(sorted_indices)) # Run the window problems if p.threaded Threads.@threads for i in eachindex(sorted_indices) - run(i, sorted_indices[i]) + iw = sorted_indices[i] + e = @elapsed run(i, iw) + window_elapsed[i] = e => iw end else for i in eachindex(sorted_indices) - run(i, sorted_indices[i]) + iw = sorted_indices[i] + e = @elapsed run(i, iw) + window_elapsed[i] = e => iw end end # Maybe mosaic the output return if mosaic_return - Rasters.mosaic(sum, output_stacks; to=rast, missingval=0.0, verbose) + t = time() + result = Rasters.mosaic(sum, output_stacks; to=rast, missingval=0.0, verbose) + mosaic_elapsed = time() - t + if timed + return (; result, window_elapsed, mosaic_elapsed) + else + return result + end else - output_stacks + if timed + return (; result=output_stacks, mosaic_elapsed) + else + return output_stacks + end end end @@ -139,8 +156,8 @@ end _problem_size(p::AbstractProblem, rast) = _problem_size(p, rast, axes(rast)) function _problem_size(p::AbstractProblem, rast, ranges::Tuple) - source_count = parent(_valid_sources(count, p, rast, ranges)) - target_count = parent(_valid_targets(count, p, rast, ranges)) + source_count = _valid_sources(count, p, rast, ranges) + target_count = _valid_targets(count, p, rast, ranges) return source_count, target_count end @@ -232,7 +249,7 @@ end # Single batch job for running on clusters function solve(p::BatchProblem, rast::RasterStack, i::Int; window_indices=_window_indices(p, rast), - verbose::Bool=false, + verbose::Bool=false, kw... ) # Get the ranges of all jobs @@ -242,7 +259,8 @@ function solve(p::BatchProblem, rast::RasterStack, i::Int; rs = window_ranges[window_indices[i]] # Get the raster data for job i - rast_window = _get_window_with_zeroed_buffer(p, rast, rs) + # Just read the whole thing now to reduce reads in overlapping windows + rast_window = read(_get_window_with_zeroed_buffer(p, rast, rs)) # Solve for this raster output = solve(p.problem, rast_window; kw...) @@ -308,32 +326,39 @@ function assess( ) # Calculate outer window ranges window_ranges = _window_ranges(p, rast) + println("Assessing $(length(window_ranges)) jobs") # Define a channel to store window raster and reuse memory channel = Channel{Any}(Threads.nthreads()) - for i in 1:nthreads - put!(channel, _get_window_with_zeroed_buffer(p, rast, first(window_ranges))) + open(rast) do o + for i in 1:nthreads + put!(channel, _get_window_with_zeroed_buffer(getindex, p, o, first(window_ranges))) + end end # Define a vector for all assessment data assessments = Vector{Any}(undef, length(window_ranges)) - # Run assessments threaded as they can take a long time for large rasters Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] - verbose && println("Assessing batch: $i, $rs") + println("Assessing batch: $i, $rs") + verbose && println("Retrieving raster from channel...") window_rast = take!(channel) - window_rast = if map(length, rs) == size(window_rast) - _get_window_with_zeroed_buffer!(window_rast, p, rast, rs) - else - _get_window_with_zeroed_buffer(p, rast, rs) + verbose && println("Copy raster data") + window_rast = open(rast) do o + if map(length, rs) == size(window_rast) + _get_window_with_zeroed_buffer!(window_rast, p, o, rs) + else + _get_window_with_zeroed_buffer(getindex, p, o, rs) + end end - # Skip NaN only rasters - nvalid = count(_isnvalid, window_rast.target_qualities) + verbose && println("Skipping NaN only rasters...") + nvalid = count(_isvalid, window_rast.target_qualities) assessments[i] = if nvalid > 0 + verbose && println(" nvalid: $nvalid") assess(p.problem, window_rast; nthreads, print=false, kw...) else - println(" No targets found") + verbose && println(" No targets found") (; shape=(0, 0), njobs=0, @@ -435,7 +460,7 @@ function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRa size = Base.size(rast) centersize = ConScape.centersize(p) buffer = ConScape.buffer(p) - ws1, ws2 = windowsize = 2buffer .+ centersize + windowsize = 2buffer .+ centersize cs1, cs2 = centersize # Define the corners of each window corners = CartesianIndices(size)[begin:cs1:end-2buffer, begin:cs2:end-2buffer] @@ -446,27 +471,26 @@ end # _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) - b = buffer(p) - fill = zero(eltype(rast.target_qualities)) window = view(rast, rs...) - maplayers(dest, window) do d, w - parent(parent(d)) .= parent(w) - end - if !isnothing(grain(p)) - coarse_graining!(dest, grain(p)) - end - dest.target_qualities[begin:min(begin+b-1, end), :] .= fill - dest.target_qualities[:, begin:min(begin+b-1, end)] .= fill - dest.target_qualities[max(end-b+1, begin):end, :] .= fill - dest.target_qualities[:, max(end-b+1, begin):end] .= fill - return rebuild(dest; dims=dims(window)) + dest = rebuild(dest; dims=dims(window)) + # @show typeof(parent(parent(dest.qualities))) typeof(parent(parent(window.qualities))) + # error() + parent(parent(dest.qualities)) .= parent(parent(window.qualities)) + parent(parent(dest.affinities)) .= parent(parent(window.affinities)) + return _with_sparse_targets(p, window, dest) +end +_get_window_with_zeroed_buffer(p::AbstractWindowedProblem, args...) = + _get_window_with_zeroed_buffer(view, p, args...) +_get_window_with_zeroed_buffer(f::Function , p::AbstractWindowedProblem, rast::RasterStack) = + _get_window_with_zeroed_buffer(f, p, rast, axes(rast)) +function _get_window_with_zeroed_buffer(f::Function, p::AbstractWindowedProblem, rast::RasterStack, rs) + window = f(rast, rs...) + return _with_sparse_targets(p, window, window) end -_get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack) = - _get_window_with_zeroed_buffer(p, rast, axes(rast)) -function _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, rast::RasterStack, rs) + +function _with_sparse_targets(p, source, dest) b = buffer(p) - dest = view(rast, rs...) - tq = dest.target_qualities + tq = source.target_qualities tq_sparse = spzeros(eltype(tq), size(tq)) center_ranges = map(s -> b:s-b, size(tq)) tq_sparse[center_ranges...] = tq[center_ranges...] @@ -486,7 +510,7 @@ function _valid_sources(f, p, rast::AbstractRasterStack, source_ranges::Tuple) window = view(rast.qualities, source_ranges...) # If there are non-NaN cells above zero, keep the window # TODO allow users to change this condition? - return _isvalid.(window) + return f(_isvalid.(window)) end function _valid_targets( f, p, rast::AbstractRasterStack, source_ranges::Tuple @@ -500,7 +524,7 @@ function _valid_targets( window = view(rast.target_qualities, target_ranges...) # If there are non-NaN cells above zero, keep the window # TODO allow users to change this condition? - return _isvalid.(window) + return f(_isvalid.(window)) end _isvalid(x) = !isnan(x) && x > zero(x) diff --git a/test/runtests.jl b/test/runtests.jl index 7a26086..d8f839c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,7 @@ using Rasters, ArchGDAL, Plots datadir = joinpath(dirname(pathof(ConScape)), "..", "data") _tempdir = mkdir(tempname()) -#@testset "sno_2000 Rasters" begin +@testset "sno_2000 Rasters" begin landscape = "sno_2000" θ = 0.1 @@ -67,7 +67,7 @@ _tempdir = mkdir(tempname()) matrix_type=mt) isa ConScape.SparseMatrixCSC end - # @testset "Test betweenness" begin + @testset "Test betweenness" begin @testset "q-weighted" begin bet = ConScape.betweenness_qweighted(grsp) @test bet isa Raster @@ -77,7 +77,7 @@ _tempdir = mkdir(tempname()) 4641.815380725279 3365.3296878569213 477.1085971945757], atol=1e-3) end - # @testset "k-weighted" begin + @testset "k-weighted" begin bet = ConScape.betweenness_kweighted(grsp, diagvalue=1.) @test bet isa Raster @test isapprox(bet[21:23, 31:33], [ From 322b52064a680ffe933d5fcab45f9e9f40cb18b1 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 6 Feb 2025 14:53:44 +0100 Subject: [PATCH 19/51] reorganise init --- src/graph_measure.jl | 1 + src/problem.jl | 4 +- src/randomizedshortestpath.jl | 5 +- src/solvers.jl | 157 ++++++++++++++++++++-------------- src/tiles.jl | 6 +- test/problem.jl | 62 +++++++------- 6 files changed, 132 insertions(+), 103 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 46279df..4c139cb 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -106,6 +106,7 @@ needs_connectivity(::Criticality) = true return_type(::GraphMeasure) = false needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true +needs_Z(::GraphMeasure) = true needs_workspaces(::GraphMeasure) = 0 needs_workspaces(::BetweennessMeasure) = 1 needs_workspaces(::EdgeBetweennessKweighted) = 2 diff --git a/src/problem.jl b/src/problem.jl index 6ba3266..0488e56 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -49,8 +49,8 @@ solve!(workspace::NamedTuple, p::Problem; kw...) = solve!(workspace, solver(p), connectivity_measure(p), p; kw...) # Init is conditional on solver and connectivity measure -function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; kw...) - println("Initialising for $(solver(p))") +function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; verbose=false, kw...) + verbose && println("Initialising for $(solver(p))") init!(workspace, solver(p), connectivity_measure(p), p, rast; kw...) end diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 731104a..1335137 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -219,9 +219,10 @@ function RSP_expected_cost(W::SparseMatrixCSC, A=(I - W), A_init=init(solver, A), workspaces=[similar(Z), similar(Z)], - CW=C .* W, + expected_costs, kw... ) + CW = C .* W workspace1, workspace2 = workspaces if axes(W) != axes(C) throw(DimensionMismatch("")) @@ -256,7 +257,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, dˢ[j] = C̄[landmarks[j], j] end C̄ .-= dˢ' - return copy(C̄) + return copyto!(expected_costs, C̄) end function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; diff --git a/src/solvers.jl b/src/solvers.jl index 2fde587..3bb17f4 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -6,53 +6,94 @@ function init!( cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; - verbose=true + verbose=false, ) - verbose = true verbose && println("Defining grid for RasterStack size $(size(rast))...") grid = g = Grid(p, rast) verbose && println("Retreiving measures...") gms = graph_measures(p) cf = connectivity_function(p) verbose && println("Defining sparse arrays of size $(size(g.affinities))...") - Pref = _Pref(g.affinities) - W = _W(Pref, cm.θ, g.costmatrix) # Sparse lhs - A = I - W # Sparse rhs B_sparse = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) # A_init = haskey(ws, :A_init) ? init(s, A) : init!(ws.A_init, s, A) verbose && println("Initialising factorizations...") - A_init = init(s, A) # B_dense becomes Z verbose && println("Allocating workspaces...") - B_dense = haskey(ws, :Z) ? copyto!(_resize(ws.Z, size(B_sparse)), B_sparse) : Matrix(B_sparse) + Z = if hastrait(needs_inv, gms) + if haskey(ws, :Z) + copyto!(_resize(ws.Z, size(B_sparse)), B_sparse) + else + Matrix{eltype(B_sparse)}(undef, size(B_sparse)) + end + else + nothing + end + Zⁱ = if hastrait(needs_inv, gms) + haskey(ws, :Zⁱ) ? _resize(ws.Zⁱ, size(Z)) : similar(Z) + else + nothing + end n_workspaces = count_workspaces(p) n_permuted_workspaces = count_permuted_workspaces(p) - # @show haskey(ws, :workspaces) workspaces = if haskey(ws, :workspaces) - [_reshape(w, size(B_dense)) for w in ws.workspaces] + [_reshape(w, size(Z)) for w in ws.workspaces] else - [similar(B_dense) for _ in 1:n_workspaces] + [similar(Z) for _ in 1:n_workspaces] end permuted_workspaces = if haskey(ws, :workspaces) - [_reshape(pw, size(B_dense')) for pw in ws.permuted_workspaces] + [_reshape(pw, size(Z')) for pw in ws.permuted_workspaces] else - [similar(B_dense') for _ in 1:n_permuted_workspaces] + [similar(Z') for _ in 1:n_permuted_workspaces] end - - verbose && println("Solving Z matrix...") - Z = ldiv!(s, A_init, B_dense; B_copy=copyto!(workspaces[1], B_dense)) - # Check that values in Z are not too small: - # verbose && _check_z(s, Z, W, g) - grsp = GridRSP(grid, cm.θ, Pref, W, Z) - - verbose && println("Calculating inverses...") - Zⁱ = if hastrait(needs_inv, gms) - haskey(ws, :Zⁱ) ? _inv!(_reshape(ws.Zⁱ, size(Z)), Z) : _inv(Z) + expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + haskey(ws, :expected_costs) ? _reshape(ws.expected_costs, size(Z)) : similar(Z) + else + nothing + end + free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + haskey(ws, :free_energy_distances) ? _reshape(ws.free_energy_distances, size(Z)) : similar(Z) + else + nothing + end + proximities = if hastrait(needs_proximity, gms) + haskey(ws, :proximities) ? _reshape(ws.proximities, size(Z)) : similar(Z) else nothing end + + verbose && println("Finished allocating...") + + return (; Z, Zⁱ, workspaces, permuted_workspaces, grid, free_energy_distances, expected_costs, proximities) +end + +# RSP is not used for ConnectivityMeasure, so the solver isn't used +function solve!( + workspace::NamedTuple, + s::Solver, + cm::ConnectivityMeasure, + p::AbstractProblem; + verbose=false +) + g = workspace.grid + return map(p.graph_measures) do gm + compute(gm, p, ; workspace...) + end +end + +# Do all the work shared accross outputs +function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; + verbose=false +) + (; grid, Z) = ws + gms = graph_measures(p) + cf = connectivity_function(p) + + Pref = _Pref(grid.affinities) + W = _W(Pref, cm.θ, grid.costmatrix) + A = I - W + A_init = init(solver, A) Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) # Just take the adjoint of the factorization of A # where possible to save calculations and memory @@ -73,56 +114,43 @@ function init!( nothing, nothing end - # Create an intermediate workspace to use in computations - workspace_kw = (; Zⁱ, workspaces, permuted_workspaces, Aadj_init, Aadj, A, A_init) + if hastrait(needs_Z, gms) + verbose && println("Solving Z matrix...") + ldiv!(solver, A_init, Z; B_copy=copyto!(ws.workspaces[1], Z)) + # Check that values in Z are not too small: + # verbose && _check_z(s, Z, W, g) + end + if hastrait(needs_inv, gms) + verbose && println("Inverting Z...") + _inv!(_reshape(ws.Zⁱ, size(Z)), Z) + end - expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + grsp = GridRSP(grid, cm.θ, Pref, W, Z) + workspace = (; Aadj_init, Aadj, A, A_init, ws...) + if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost verbose && println("Calculating expected cost...") - ConScape.expected_cost(grsp; workspace_kw..., solver=solver(p)) - else - nothing + ConScape.expected_cost(grsp; workspace..., solver) end - free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance verbose && println("Calculating free energy distance...") - ConScape.free_energy_distance(grsp; workspace_kw..., solver=solver(p)) - else - nothing - end - proximities = if hastrait(needs_proximity, gms) - verbose && println("Calculating proximities...") - # We populate this during `solve` - haskey(ws, :proximities) ? _reshape(ws.proximities, size(Z)) : similar(Z) - else - nothing + ConScape.free_energy_distance(grsp; workspace..., solver) end - CW = grsp.g.costmatrix .* grsp.W - verbose && println("Finished workspace...") - return (; grid, grsp, workspace_kw..., CW, free_energy_distances, expected_costs, proximities) + return workspace end -# RSP is not used for ConnectivityMeasure, so the solver isn't used function solve!( - workspace::NamedTuple, - s::Solver, - cm::ConnectivityMeasure, - p::AbstractProblem; - verbose=false -) - g = workspace.grid - return map(p.graph_measures) do gm - compute(gm, p, ; workspace...) - end -end -function solve!( - workspace::NamedTuple, - s::Solver, + ws::NamedTuple, + solver::Solver, cm::FundamentalMeasure, p::Problem; - verbose=false + verbose=false, ) - g = workspace.grid + workspace = _shared_solves!(ws, solver, cm, p; verbose) gms = graph_measures(p) + (; grid, Pref, W, Z) = workspace + # GridRSP is just a wrapper now, we can remove it later + grsp = GridRSP(grid, cm.θ, Pref, W, Z) distance_transformation = cm.distance_transformation results = if distance_transformation isa NamedTuple # Map over both distance transformations and graph measures @@ -135,7 +163,7 @@ function solve!( p1 = ConstructionBase.setproperties(p, (; connectivity_measure=cm1)) map(gms) do gm if needs_connectivity(gm) - compute(gm, p1, workspace.grsp; workspace...) + compute(gm, p1, grsp; workspace...) else nothing end @@ -146,7 +174,7 @@ function solve!( if needs_connectivity(gm) nothing else - compute(gm, p, workspace.grsp; workspace...) + compute(gm, p, grsp; workspace...) end end # Combine nested and flat results @@ -163,7 +191,7 @@ function solve!( _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, workspace.grsp) # Map over graph measures map(p.graph_measures) do gm - compute(gm, p, workspace.grsp; workspace...) + compute(gm, p, grsp; workspace...) end end return _merge_to_stack(results) @@ -214,10 +242,9 @@ function init(s::VectorSolver, A::AbstractMatrix) F = lu(A) Tb = Vector{eltype(A)} if s.threaded - nbuffers = Threads.nthreads() - # channel = Channel{Tuple{typeof(F),Vector{Float64}}}(nbuffers) # Create one init per thread # UMFPACK `copy` shares memory but avoids workspace race conditions + nbuffers = Threads.nthreads() [ (; F=(i == 1 ? F : copy(F)), @@ -232,8 +259,8 @@ function init(s::VectorSolver, A::AbstractMatrix) end function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) - transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) + transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A # This is basically SparseArrays.UMFPACK._AqldivB_kernel! # But we unroll it to avoid copies or allocation of B diff --git a/src/tiles.jl b/src/tiles.jl index 9e88f24..5795f4b 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -39,7 +39,7 @@ function solve(p::WindowedProblem, rast::RasterStack; kw...) end function solve!(workspace, p::WindowedProblem, rast::RasterStack; test_windows::Bool=false, - verbose::Bool=true, + verbose::Bool=false, mosaic_return::Bool=true, window_ranges=_window_ranges(p, rast), window_sizes=_window_sizes(p, rast; window_ranges), @@ -326,7 +326,7 @@ function assess( ) # Calculate outer window ranges window_ranges = _window_ranges(p, rast) - println("Assessing $(length(window_ranges)) jobs") + verbose && println("Assessing $(length(window_ranges)) jobs") # Define a channel to store window raster and reuse memory channel = Channel{Any}(Threads.nthreads()) @@ -341,7 +341,7 @@ function assess( # Run assessments threaded as they can take a long time for large rasters Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] - println("Assessing batch: $i, $rs") + verbose && println("Assessing batch: $i, $rs") verbose && println("Retrieving raster from channel...") window_rast = take!(channel) verbose && println("Copy raster data") diff --git a/test/problem.jl b/test/problem.jl index 056de16..dfd14c2 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -264,34 +264,34 @@ end # Scale Benchmarking... -# windowed_problem_t1 = ConScape.WindowedProblem(problem; -# source_radius=10, target_radius=1, threaded=true -# ) -# windowed_problem_t2 = ConScape.WindowedProblem(problem; -# source_radius=10, target_radius=2, threaded=true -# ) -# windowed_problem_t4 = ConScape.WindowedProblem(problem; -# source_radius=10, target_radius=4, threaded=true -# ) -# windowed_problem_t6 = ConScape.WindowedProblem(problem; -# source_radius=10, target_radius=6, threaded=true -# ) -# length(ConScape._get_window_ranges(windowed_problem_t1, rast)) -# length(ConScape._get_window_ranges(windowed_problem_t2, rast)) -# length(ConScape._get_window_ranges(windowed_problem_t4, rast)) -# length(ConScape._get_window_ranges(windowed_problem_t6, rast)) -# using BenchmarkTools -# @btime ConScape.solve(windowed_problem_t1, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t2, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t4, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t6, rast, verbose=false); -# @profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 -# @profview -# res = ConScape.solve(windowed_problem_t1, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t2, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t4, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t6, rast, verbose=false) -# res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file +windowed_problem_t1 = ConScape.WindowedProblem(problem; + buffer=10, centersize=1, threaded=true +) +windowed_problem_t2 = ConScape.WindowedProblem(problem; + buffer=10, centersize=2, threaded=true +) +windowed_problem_t4 = ConScape.WindowedProblem(problem; + buffer=10, centersize=4, threaded=true +) +windowed_problem_t6 = ConScape.WindowedProblem(problem; + buffer=10, centersize=6, threaded=true +) +length(ConScape._window_ranges(windowed_problem_t1, rast)) +length(ConScape._window_ranges(windowed_problem_t2, rast)) +length(ConScape._window_ranges(windowed_problem_t4, rast)) +length(ConScape._window_ranges(windowed_problem_t6, rast)) +using BenchmarkTools +ConScape.solve(windowed_problem_t1, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t2, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t4, rast, verbose=false); +@btime ConScape.solve(windowed_problem_t6, rast, verbose=false); +@profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 +@profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 +@profview +res = ConScape.solve(windowed_problem_t1, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t2, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t4, rast, verbose=false) +@profview ConScape.solve(windowed_problem_t6, rast, verbose=false) +res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file From ef16390632966aadefa668e863e78fb9d7420fbd Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 29 Jan 2025 23:44:51 +0100 Subject: [PATCH 20/51] nwindows --- src/tiles.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tiles.jl b/src/tiles.jl index 5795f4b..e15d3f7 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -415,7 +415,7 @@ function assess( end # Mosaic the stored files to a RasterStack -function Rasters.mosaic(p::BatchProblem; to, missingval=0.0, kw...) +function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) ranges = _window_ranges(p, to) paths = [_window_path(p, rs) for rs in ranges] stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] From 94347a21b3f708dda48eeb6fe9ce043fe852c310 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 7 Feb 2025 16:32:47 +0100 Subject: [PATCH 21/51] refactor allocs to init --- src/grid.jl | 2 +- src/gridrsp.jl | 5 +- src/randomizedshortestpath.jl | 17 +++-- src/solvers.jl | 55 +++++++------ src/tiles.jl | 140 ++++++++++++++++------------------ test/problem.jl | 114 +++++++++++++++------------ 6 files changed, 177 insertions(+), 156 deletions(-) diff --git a/src/grid.jl b/src/grid.jl index d3a81e0..800c883 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -273,7 +273,7 @@ function largest_subgraph(g::Grid) # @info "cost graph contains $(length(scc)) strongly connected subgraphs" # Find the largest subgraph - i = argmax(length.(scc)) + _, i = findmax(length, scc) # extract node list and sort it scci = sort(scc[i]) diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 66b97e3..aa3bd9c 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -16,7 +16,6 @@ function GridRSP(g::Grid; θ=nothing, verbose=true) Pref = _Pref(g.affinities) W = _W(Pref, θ, g.costmatrix) - error() @debug("Computing fundamental matrix of non-absorbing paths (Z). Please be patient...") Z = (I - W)\Matrix(sparse(g.targetnodes, 1:length(g.targetnodes), @@ -111,10 +110,8 @@ end """ edge_betweenness_kweighted(grsp::GridRSP; [distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing])::SparseMatrixCSC{Float64,Int} - Compute RSP betweenness of all edges weighted by qualities of source s and target t and the proximity between s and t. Returns a - sparse matrix where element (i,j) is the betweenness of edge (i,j). + Compute RSP betweenness of all edges weighted by qualities of source s and target t and the proximity between s and t. Returns a sparse matrix where element (i,j) is the betweenness of edge (i,j). - The optional `diagvalue` element specifies which value to use for the diagonal of the matrix of proximities, i.e. after applying the inverse cost function to the matrix of expected costs. When nothing is specified, the diagonal elements won't be adjusted. """ diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index 1335137..bcd155c 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -10,9 +10,10 @@ end _inv(Z) = _inv!(similar(Z), Z) function _inv!(Zⁱ, Z) - Zⁱ = inv.(Z) - Zⁱ[.!isfinite.(Zⁱ)] .= floatmax(eltype(Z)) # To prevent Inf*0 later... - return Zⁱ + broadcast(Z) do x + x = inv(x) + isfinite(x) ? x : floatmax(eltype(Z)) + end end _Pref(A::SparseMatrixCSC) = Diagonal(inv.(vec(sum(A, dims=2)))) * A @@ -219,7 +220,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, A=(I - W), A_init=init(solver, A), workspaces=[similar(Z), similar(Z)], - expected_costs, + expected_costs=similar(Z), kw... ) CW = C .* W @@ -261,12 +262,16 @@ function RSP_expected_cost(W::SparseMatrixCSC, end function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; - survival_probability=nothing, kw... + survival_probability=nothing, + free_energy_distances=similar(Z), + kw... ) if isnothing(survival_probability) survival_probability = RSP_survival_probability(Z, θ, landmarks; kw...) end - return -log.(max.(zero(eltype(Z)), survival_probability)) ./ θ + free_energy_distances .= -log.(max.(zero(eltype(Z)), survival_probability)) ./ θ + + return free_energy_distances end function RSP_survival_probability(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; kw...) diff --git a/src/solvers.jl b/src/solvers.jl index 3bb17f4..c9ed387 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -13,25 +13,21 @@ function init!( verbose && println("Retreiving measures...") gms = graph_measures(p) cf = connectivity_function(p) - verbose && println("Defining sparse arrays of size $(size(g.affinities))...") - # Sparse lhs - # Sparse rhs - B_sparse = sparse_rhs(g.targetnodes, size(g.costmatrix, 1)) - # A_init = haskey(ws, :A_init) ? init(s, A) : init!(ws.A_init, s, A) - verbose && println("Initialising factorizations...") + verbose && println("Defining sparse arrays...") # B_dense becomes Z verbose && println("Allocating workspaces...") + sze = (size(g.costmatrix, 1), length(g.targetnodes)) Z = if hastrait(needs_inv, gms) - if haskey(ws, :Z) - copyto!(_resize(ws.Z, size(B_sparse)), B_sparse) + if haskey(ws, :Z) + _reshape(ws.Z, sze) else - Matrix{eltype(B_sparse)}(undef, size(B_sparse)) + Matrix{eltype(g.affinities)}(undef, sze) end else nothing end Zⁱ = if hastrait(needs_inv, gms) - haskey(ws, :Zⁱ) ? _resize(ws.Zⁱ, size(Z)) : similar(Z) + haskey(ws, :Zⁱ) ? _reshape(ws.Zⁱ, sze) : similar(Z) else nothing end @@ -86,12 +82,16 @@ end function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; verbose=false ) - (; grid, Z) = ws + (; grid) = ws gms = graph_measures(p) cf = connectivity_function(p) + verbose && println("Initialising factorizations...") Pref = _Pref(grid.affinities) W = _W(Pref, cm.θ, grid.costmatrix) + # Sparse rhs + B_sparse = sparse_rhs(grid.targetnodes, size(grid.costmatrix, 1)) + # Sparse lfs A = I - W A_init = init(solver, A) Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) @@ -106,7 +106,7 @@ function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; # LinearSolve.jl cant handle the adjoint # so we duplicate work and allocations Aadj = sparse(A') - Aadj_init = init(solver(p), Aadj) + Aadj_init = init(solver, Aadj) Aadj_init, Aadj end Aadj_init, Aadj @@ -114,29 +114,36 @@ function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; nothing, nothing end - if hastrait(needs_Z, gms) + Z = if hastrait(needs_Z, gms) + # verbose && + B = _reshape(ws.Z, size(B_sparse)) + copyto!(B, B_sparse) verbose && println("Solving Z matrix...") - ldiv!(solver, A_init, Z; B_copy=copyto!(ws.workspaces[1], Z)) # Check that values in Z are not too small: + Z = ldiv!(solver, A_init, B; B_copy=copyto!(ws.workspaces[1], B)) # verbose && _check_z(s, Z, W, g) + Z end - if hastrait(needs_inv, gms) + Zⁱ = if hastrait(needs_inv, gms) verbose && println("Inverting Z...") _inv!(_reshape(ws.Zⁱ, size(Z)), Z) end grsp = GridRSP(grid, cm.θ, Pref, W, Z) - workspace = (; Aadj_init, Aadj, A, A_init, ws...) - if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost + workspace = (; ws..., Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ) + + expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost verbose && println("Calculating expected cost...") - ConScape.expected_cost(grsp; workspace..., solver) + expected_costs = _reshape(ws.expected_costs, size(Z)) + ConScape.expected_cost(grsp; workspace..., expected_costs, solver) end - if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance + free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance verbose && println("Calculating free energy distance...") - ConScape.free_energy_distance(grsp; workspace..., solver) + free_energy_distances = _reshape(ws.free_energy_distances, size(Z)) + ConScape.free_energy_distance(grsp; workspace..., free_energy_distances, solver) end - return workspace + return (; ws..., Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ, expected_costs, free_energy_distances) end function solve!( @@ -157,7 +164,7 @@ function solve!( nested = map(distance_transformation) do dt cm1 = ConstructionBase.setproperties(cm, (; distance_transformation=dt)) hastrait(needs_proximity, gms) && - _setproximities!(workspace.proximities, workspace.expected_costs, cm1, p, workspace.grsp) + _setproximities!(workspace.proximities, workspace.expected_costs, cm1, p, grsp) # Rebuild the problem with a connectivity measure # holding a single distance transformation, in case its used p1 = ConstructionBase.setproperties(p, (; connectivity_measure=cm1)) @@ -188,7 +195,7 @@ function solve!( end |> NamedTuple{keys(gms)} else hastrait(needs_proximity, gms) && - _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, workspace.grsp) + _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, grsp) # Map over graph measures map(p.graph_measures) do gm compute(gm, p, grsp; workspace...) @@ -444,6 +451,7 @@ function _setproximities!( return proximities end +# This only makes sense if arrays are sorted large to small function _reshape(A::Array, dims::Tuple{Vararg{Int}}) len = prod(dims) mem = getfield(A, :ref).mem @@ -455,6 +463,7 @@ function _reshape(A::Array, dims::Tuple{Vararg{Int}}) setfield!(v, :size, (len,)) reshape(v, dims) else + error("Arrays were not sorted") v = resize!(vec(A), len) reshape(v, dims) end diff --git a/src/tiles.jl b/src/tiles.jl index e15d3f7..43523f7 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -1,5 +1,4 @@ # This file is a work in progress... - abstract type AbstractWindowedProblem{P} <: AbstractProblem end costs(p::AbstractWindowedProblem) = costs(p.problem) @@ -33,19 +32,15 @@ WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) centersize(p::WindowedProblem) = p.centersize, p.centersize -function solve(p::WindowedProblem, rast::RasterStack; kw...) - workspace = init(p, rast) - solve!(workspace, p, rast; kw...) -end -function solve!(workspace, p::WindowedProblem, rast::RasterStack; +solve(p::WindowedProblem, rast::RasterStack; kw...) = + solve!(init(p, rast), p; kw...) +function solve!(workspace, p::WindowedProblem; test_windows::Bool=false, verbose::Bool=false, mosaic_return::Bool=true, - window_ranges=_window_ranges(p, rast), - window_sizes=_window_sizes(p, rast; window_ranges), - window_indices=_window_indices(p, rast; window_ranges), timed=false, ) + (; rast, window_workspaces, window_ranges, window_indices, sorted_indices) = workspace # Test outputs just return the inputs after window masking if test_windows output_stacks = map(window_indices) do i @@ -60,32 +55,27 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; end end - n = max(length(window_indices), p.threaded ? Threads.nthreads() : 1) - ch = Channel{NamedTuple}(n) - for _ in 1:n - put!(ch, (;)) + ch = Channel{NamedTuple}(length(window_workspaces)) + for ws in window_workspaces + put!(ch, ws) end - - sorted_indices = last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)) - verbose && @show sorted_indices # Set up channels for threading - # ch = workspace.channel # Define empty outputs output_stacks = Vector{RasterStack}(undef, length(sorted_indices)) # Define a runner for threaded/non-threaded operation function run(i, iw) # Get a window range - rs = window_ranges[iw] - verbose && println("Running job $iw on ranges $rs and thread $(Threads.threadid())") - # verbose && println("Solving window $i $rs ") - rast_window = _get_window_with_zeroed_buffer(p, rast, rs) + window = window_ranges[iw] + verbose && println("Running job $iw on ranges $window and thread $(Threads.threadid())") + # verbose && println("Solving window $i $window ") + window_rast = _get_window_with_zeroed_buffer(p, rast, window) # Initialise the window using stored memory verbose && println("Getting workspace from channel...") workspace = take!(ch) - verbose && println("Initialising window from size $(size(rast_window)), from ranges $rs...") - workspace = init!(workspace, p.problem, rast_window; verbose) + verbose && println("Initialising window from size $(size(window_rast)), from ranges $window...") + workspace = init!(workspace, p.problem, window_rast; verbose) # Solve for the window - verbose && println("Solving window $rs...") + verbose && println("Solving window $window...") output_stacks[i] = solve!(workspace, p.problem) # Return the workspace to the channel put!(ch, workspace) @@ -124,20 +114,21 @@ function solve!(workspace, p::WindowedProblem, rast::RasterStack; end end -init(p::AbstractWindowedProblem, rast::RasterStack; kw...) = init!((;), p, rast; kw...) -function init!(workspace::NamedTuple, p::AbstractWindowedProblem, rast::RasterStack; kw...) - # TODO actually allocate - n = p.threaded ? Threads.nthreads() : 1 - # workspace = if haskey(workspace, :channel) - # workspace - # else - # channel = Channel{NamedTuple}(n) - # for _ in 1:n - # put!(channel, (;)) - # end - # (; channel) - # end - return workspace +init(p::WindowedProblem, rast::RasterStack; kw...) = init!((;), p, rast; kw...) +function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; + window_ranges=_window_ranges(p, rast), + window_sizes=_window_sizes(p, rast; window_ranges), + window_indices=_window_indices(p, rast; window_ranges), + sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), +) + n = max(length(window_indices), p.threaded ? Threads.nthreads() : 1) + largest_rast = rast[window_ranges[first(sorted_indices)]...] + window_workspaces = if haskey(workspace, :window_workspaces) + [init!(ws, p.problem, largest_rast) for ws in window_workspaces] + else + [init(p.problem, largest_rast) for _ in 1:n] + end + return (; rast, window_workspaces, window_sizes, window_ranges, window_indices, sorted_indices) end function _max_window_problem_size(p::AbstractWindowedProblem, rast; kw...) @@ -230,8 +221,7 @@ end centersize(p::BatchProblem) = p.centersize function solve(p::BatchProblem, rast::RasterStack; - window_indices=_window_indices(p, rast), - kw... + window_indices=_window_indices(p, rast), kw... ) function run(i) solve(p, rast, i; window_indices, kw...) @@ -246,35 +236,31 @@ function solve(p::BatchProblem, rast::RasterStack; end end end +solve(p::BatchProblem, rast::RasterStack, i; kw...) = + solve!(init(p, rast, i), p; kw...) # Single batch job for running on clusters -function solve(p::BatchProblem, rast::RasterStack, i::Int; - window_indices=_window_indices(p, rast), - verbose::Bool=false, - kw... -) - # Get the ranges of all jobs - window_ranges = _window_ranges(p, rast) - - # Get the window range of job i - rs = window_ranges[window_indices[i]] +function solve!(ws, p::BatchProblem; verbose=false, kw...) + # Solve for this window + output = solve!(ws.workspace, p.problem; verbose) + # Store the output rasters for this job to disk and return the file path + return _store(p, output, ws.window; verbose) +end +function init(p::BatchProblem, rast::RasterStack, i::Int; + batch_ranges=_window_ranges(p, rast), + batch_indices=_window_indices(p, rast), +) # Get the raster data for job i + @show i batch_indices batch_ranges + window = batch_ranges[batch_indices[i]] # Just read the whole thing now to reduce reads in overlapping windows - rast_window = read(_get_window_with_zeroed_buffer(p, rast, rs)) - - # Solve for this raster - output = solve(p.problem, rast_window; kw...) - - # Store the output rasters for this job to disk and return the file path - return _store(p, output, rs; verbose) + batch_rast = read(_get_window_with_zeroed_buffer(p, rast, window)) + return (; rast=batch_rast, workspace=init(p.problem, batch_rast), batch=1, window) end function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; - nthreads=Threads.nthreads(), - print=true, - kw... + nthreads=Threads.nthreads(), kw... ) - # Define the ranges of each window window_ranges = _window_ranges(p, rast) @@ -320,7 +306,6 @@ function assess( p::AbstractWindowedProblem{<:AbstractWindowedProblem}, rast::AbstractRasterStack; nthreads=Threads.nthreads(), - print=true, verbose=false, kw... ) @@ -347,6 +332,7 @@ function assess( verbose && println("Copy raster data") window_rast = open(rast) do o if map(length, rs) == size(window_rast) + @show map(length, rs) size(window_rast) _get_window_with_zeroed_buffer!(window_rast, p, o, rs) else _get_window_with_zeroed_buffer(getindex, p, o, rs) @@ -356,7 +342,7 @@ function assess( nvalid = count(_isvalid, window_rast.target_qualities) assessments[i] = if nvalid > 0 verbose && println(" nvalid: $nvalid") - assess(p.problem, window_rast; nthreads, print=false, kw...) + assess(p.problem, window_rast; nthreads, kw...) else verbose && println(" No targets found") (; @@ -409,7 +395,7 @@ function assess( inner_window_masks, ) - print && display(pairs(fields)) + verbose && display(pairs(fields)) return fields end @@ -471,28 +457,34 @@ end # _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) - window = view(rast, rs...) - dest = rebuild(dest; dims=dims(window)) - # @show typeof(parent(parent(dest.qualities))) typeof(parent(parent(window.qualities))) - # error() - parent(parent(dest.qualities)) .= parent(parent(window.qualities)) - parent(parent(dest.affinities)) .= parent(parent(window.affinities)) - return _with_sparse_targets(p, window, dest) + source = view(rast, rs...) + # Reshape and rebuild to resuse memory + data = ( + affinities=_reshape(parent(parent(dest.affinities)), size(source)), + qualities=_reshape(parent(parent(dest.qualities)), size(source)), + target_qualities=parent(parent(dest.target_qualities)), + ) + dest = rebuild(dest; data, dims=dims(source)) + # Update values + dest.qualities .= source.qualities + dest.affinities .= source.affinities + + return _with_sparse_targets(p, source, dest) end _get_window_with_zeroed_buffer(p::AbstractWindowedProblem, args...) = _get_window_with_zeroed_buffer(view, p, args...) _get_window_with_zeroed_buffer(f::Function , p::AbstractWindowedProblem, rast::RasterStack) = _get_window_with_zeroed_buffer(f, p, rast, axes(rast)) function _get_window_with_zeroed_buffer(f::Function, p::AbstractWindowedProblem, rast::RasterStack, rs) - window = f(rast, rs...) - return _with_sparse_targets(p, window, window) + source = f(rast, rs...) + return _with_sparse_targets(p, source, source) end function _with_sparse_targets(p, source, dest) b = buffer(p) tq = source.target_qualities tq_sparse = spzeros(eltype(tq), size(tq)) - center_ranges = map(s -> b:s-b, size(tq)) + center_ranges = map(s -> b+1:s-b, size(tq)) tq_sparse[center_ranges...] = tq[center_ranges...] if !isnothing(grain(p)) tq_sparse = coarse_graining(tq_sparse, grain(p)) diff --git a/test/problem.jl b/test/problem.jl index dfd14c2..9ab6505 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -69,19 +69,19 @@ for solver in solvers graph_measures, connectivity_measure, solver, ) @time workspace = init(problem, rast); + Z = copy(workspace.Z) @testset "initialised grids are the same" begin - @test workspace.grsp.W == test_grsp.W - @test workspace.grsp.Z == test_grsp.Z - @test workspace.grsp.Pref == test_grsp.Pref - @test workspace.grsp.θ == test_grsp.θ + # @test workspace.g.θ == test_grsp.θ foreach(propertynames(test_g)) do n @test isequal(getproperty(workspace.grid, n), getproperty(test_g, n)) end - @test workspace.expected_costs == ConScape.expected_cost(test_grsp) - @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) end result = ConScape.solve!(workspace, problem); + @test workspace.expected_costs == ConScape.expected_cost(test_grsp) + @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) + @test workspace.Z == test_grsp.Z + # @profview result = ConScape.solve(problem, workspace) @test result isa NamedTuple @test size(result.ch_one) == size(rast) @@ -160,7 +160,6 @@ problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) solve(problem, rast; verbose=true) @testset "target mosaicing matches original" begin - # TODO note that this breaks if q weighting is included windowed_problem = ConScape.WindowedProblem(problem; buffer=10, centersize=5, threaded=false ) @@ -179,7 +178,7 @@ solve(problem, rast; verbose=true) inner_targets[:, 1:10] .= 0 inner_targets[end-9:end, :] .= 0 inner_targets[:, end-9:end] .= 0 - @test inner_targets == test_results.target_qualities + @test parent(inner_targets) == parent(test_results.target_qualities) end @testset "windowed results approximate non-windowed" begin @@ -203,7 +202,7 @@ end # BatchProblem writes files to disk and mosaics to RasterStack -@testset "batch problem matches windowed problem" begin +# @testset "batch problem matches windowed problem" begin # Use a higher alpha to catch differences distance_transformation = x -> exp(-x / 50) connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) @@ -211,7 +210,8 @@ end kw = (; buffer=10, centersize=5, threaded=false) windowed_problem = ConScape.WindowedProblem(problem; kw...) - windowed_result = ConScape.solve(windowed_problem, rast) + @time workspace = ConScape.init(windowed_problem, rast); + @time windowed_result = ConScape.solve!(workspace, windowed_problem); batch_problem = ConScape.BatchProblem(problem; datapath=tempname(), kw...) ConScape.solve(batch_problem, rast) @@ -221,13 +221,11 @@ end # BatchProblem can be run as batch jobs for clusters # We just need a new path to make sure the result is from a new run batch_jobs_problem = ConScape.BatchProblem(problem; - datapath=tempname(), joblistpath=tempname(), kw... + datapath=tempname(), kw... ) assessment = ConScape.assess(batch_jobs_problem, rast) batch_jobs_problem.centersize @test assessment.njobs == 39 - @test isfile(batch_jobs_problem.joblistpath) - ConScape._read_joblist(batch_jobs_problem) for job in 1:assessment.njobs ConScape.solve(batch_jobs_problem, rast, job) @@ -238,17 +236,36 @@ end datapath=tempname(), centersize=(10, 10), threaded=false ) ConScape.assess(nested_problem, rast) - ConScape.solve(nested_problem, rast) nested_result = mosaic(nested_problem; to=rast) @test nested_result isa RasterStack + nested_jobs_problem = ConScape.BatchProblem(windowed_problem; + datapath=tempname(), centersize=(10, 10), threaded=false + ) + # Try one + @time workspace = ConScape.init(nested_problem, rast, 5) + @time ConScape.solve!(workspace, nested_problem) + + assessment = ConScape.assess(nested_jobs_problem, rast); + for job in 1:assessment.njobs + ConScape.solve(nested_jobs_problem, rast, job) + end + nested_jobs_result = mosaic(nested_jobs_problem; to=rast) + @test keys(windowed_result) == keys(nested_result) == keys(batch_result) == - keys(batch_jobs_result) == Tuple(sort(collect(expected_layers))) + keys(batch_jobs_result) == + keys(nested_jobs_result) == + Tuple(sort(collect(expected_layers))) + + @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch)) + @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk)) - @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch) .=== windowed_result.ch) - @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk) .=== windowed_result.betk) + # These may be approximate after mosaic order changes + compare(a, b) = isnan(a) && isnan(b) || isapprox(a, b) + @test all(compare.(permutedims(batch_result.ch), windowed_result.ch)) + @test all(compare.(permutedims(batch_result.betk), windowed_result.betk)) # TODO: there are some tiny fp differences in the nested result @test all(map(nested_result.ch, batch_result.ch) do n, b @@ -259,39 +276,40 @@ end # plot(batch_result) # plot(batch_jobs_result) # plot(nested_result) + # plot(nested_jobs_result) end # Scale Benchmarking... -windowed_problem_t1 = ConScape.WindowedProblem(problem; - buffer=10, centersize=1, threaded=true -) -windowed_problem_t2 = ConScape.WindowedProblem(problem; - buffer=10, centersize=2, threaded=true -) -windowed_problem_t4 = ConScape.WindowedProblem(problem; - buffer=10, centersize=4, threaded=true -) -windowed_problem_t6 = ConScape.WindowedProblem(problem; - buffer=10, centersize=6, threaded=true -) -length(ConScape._window_ranges(windowed_problem_t1, rast)) -length(ConScape._window_ranges(windowed_problem_t2, rast)) -length(ConScape._window_ranges(windowed_problem_t4, rast)) -length(ConScape._window_ranges(windowed_problem_t6, rast)) -using BenchmarkTools -ConScape.solve(windowed_problem_t1, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t2, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t4, rast, verbose=false); -@btime ConScape.solve(windowed_problem_t6, rast, verbose=false); -@profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 -@profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 -@profview -res = ConScape.solve(windowed_problem_t1, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t2, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t4, rast, verbose=false) -@profview ConScape.solve(windowed_problem_t6, rast, verbose=false) -res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file +# windowed_problem_t1 = ConScape.WindowedProblem(problem; +# buffer=10, centersize=1, threaded=true +# ) +# windowed_problem_t2 = ConScape.WindowedProblem(problem; +# buffer=10, centersize=2, threaded=true +# ) +# windowed_problem_t4 = ConScape.WindowedProblem(problem; +# buffer=10, centersize=4, threaded=true +# ) +# windowed_problem_t6 = ConScape.WindowedProblem(problem; +# buffer=10, centersize=6, threaded=true +# ) +# length(ConScape._window_ranges(windowed_problem_t1, rast)) +# length(ConScape._window_ranges(windowed_problem_t2, rast)) +# length(ConScape._window_ranges(windowed_problem_t4, rast)) +# length(ConScape._window_ranges(windowed_problem_t6, rast)) +# using BenchmarkTools +# ConScape.solve(windowed_problem_t1, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t2, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t4, rast, verbose=false); +# @btime ConScape.solve(windowed_problem_t6, rast, verbose=false); +# @profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 +# @profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 +# @profview +# res = ConScape.solve(windowed_problem_t1, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t2, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t4, rast, verbose=false) +# @profview ConScape.solve(windowed_problem_t6, rast, verbose=false) +# res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file From 0bd3392d12482027f5aa7585595770260382aa15 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 11 Feb 2025 23:06:43 +0100 Subject: [PATCH 22/51] updates --- src/ConScape.jl | 1 - src/allocations.jl | 85 ------------------ src/problem.jl | 12 +++ src/solvers.jl | 20 ++--- src/tiles.jl | 209 ++++++++++++++++++++++++--------------------- 5 files changed, 135 insertions(+), 192 deletions(-) delete mode 100644 src/allocations.jl diff --git a/src/ConScape.jl b/src/ConScape.jl index f11b7a1..2ca8878 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -53,6 +53,5 @@ include("connectivity_measure.jl") include("problem.jl") include("solvers.jl") include("tiles.jl") -include("allocations.jl") end diff --git a/src/allocations.jl b/src/allocations.jl deleted file mode 100644 index 18eb902..0000000 --- a/src/allocations.jl +++ /dev/null @@ -1,85 +0,0 @@ - -""" - allocations(p::AbstractProblem, size::Tuple{Int,Int}) - allocations(p::AbstractProblem, rast::RasterStack) - -Calculate allocations in Bytes required to run the problem. -The maximum dense target size will be used, so that `size` -is symmetrical. You can pass e.g. `(1000, 200)`. where you -know the size of the largest sparse matrix generated from `rast`. - -`allocations` will likely underestimate as Julia may need to allocate -for compilatation and other things outside of our control. - -A warning will be thrown for problem components whos allocations -are not well known. -""" -function allocations end - -allocations(p::Problem, rast::AbstractRasterStack; kw...) = - allocations(p, _problem_size(p, rast); kw...) -allocations(p::Problem, grid::Grid; kw...) = - allocations(p, size(grid); kw...) -function allocations(p::Problem, sze::Tuple{Int,Int}; kw...) - gms = graph_measures(p) - dense_size = sizeofdense(p, sze) - sparse_size = sizeofsparse(p, sze) - init_size = allocations(solver(p), sze; kw...) - - return_size = sum(map(gm -> sizeofreturn(gm, sze), gms)) - - total = sparse_size + dense_size + init_size + return_size - # (; total, sparse_size, dense_size, init_size, return_size, grid_size) - return total -end - - -# This is approximate. -# Size of the solver initialisation / factorization -# These are not accurate -allocations(::MatrixSolver, sze; nthreads=nothing) = sze[1] * 20 * sizeof(Float64) -function allocations(s::VectorSolver, sze; - nthreads=Threads.nthreads(), -) - if s.threaded - sze[1] * (20 + nthreads) * sizeof(Float64) - else - sze[1] * 20 * sizeof(Float64) - end -end - -# Slightly inaccurate as the band is not complete in corners -# and there are a few extra allocations that counterbalance that -function sizeofsparse((nsources, ntargets)) - windowsize = 8 - ntargets * windowsize * (sizeof(Float64) + sizeof(Int)) -end -function sizeofsparse(p, sze::Tuple{Int,Int}) - # affinities + costmatrix + A + W + Pref + B_sparse + CW - others? - 7 * sizeofsparse(sze) -end - -sizeofdense(sze::Tuple{Int,Int}) = prod(sze) * sizeof(Float64) -function sizeofdense(p::Problem, sze::Tuple{Int,Int}) - gms = graph_measures(p) - n_workspaces = count_workspaces(p) - n_permuted_workspaces = count_permuted_workspaces(p) - ec_ws = hastrait(needs_expected_cost, gms) || connectivity_measure(p) isa ConScape.ExpectedCost ? 1 : 0 - - required_dense = 1 + - n_workspaces + - n_permuted_workspaces + - ec_ws - hastrait(needs_free_energy_distance, gms) + - hastrait(needs_expected_cost, gms) + - hastrait(needs_proximity, gms) + - hastrait(needs_inv, gms) - - return sizeofdense(sze) * required_dense -end - -sizeofreturn(gm::GraphMeasure, sze) = sizeofreturn(returntype(gm), sze) -sizeofreturn(::ReturnsDenseSpatial, (n, m)) = n * sizeof(Float64) -sizeofreturn(::ReturnsSparse, (n, m)) = n * m * 8 # Roughly this for 8 neighbors -sizeofreturn(::ReturnsScalar, (n, m)) = sizeof(Float64) -sizeofreturn(r::ReturnsOther, (n, m)) = r.f(n, m) \ No newline at end of file diff --git a/src/problem.jl b/src/problem.jl index 0488e56..8c8e6d9 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -4,6 +4,7 @@ connectivity_measure(p::AbstractProblem) = connectivity_measure(p.problem) connectivity_function(p::AbstractProblem) = connectivity_function(connectivity_measure(p)) solver(p::AbstractProblem) = solver(p.problem) +isthreaded(p::AbstractProblem) = false """ assess(p::AbstractProblem, g) @@ -36,12 +37,23 @@ to be run in the same job. end Problem(graph_measures::Union{Tuple,NamedTuple}; kw...) = Problem(; graph_measures, kw...) +function Base.show(io, mime, p::Problem; indent="") + println(io, typeof(p).name.wrapper) + # println(io, indent, "graph_measures: ", p.graph_measures) + # println(io, indent, "connectivity_measure: ", p.connectivity_measure) + # println(io, indent, "costs: ", p.costs) + # println(io, indent, "solver: ", p.solver) + # println(io, indent, "diagvalue: ", typeof(p.diagvalue)) + # println(io, indent, "prune: ", p.prune) +end + diagvalue(p::Problem) = p.diagvalue graph_measures(p::Problem) = p.graph_measures connectivity_measure(p::Problem) = p.connectivity_measure solver(p::Problem) = p.solver costs(p::Problem) = p.costs prune(p::Problem) = p.prune +isthreaded(p::Problem) = p.threaded solve(p::Problem, rast::RasterStack; kw...) = solve!(init(p, rast; kw...), p; kw...) diff --git a/src/solvers.jl b/src/solvers.jl index c9ed387..54a9549 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -245,10 +245,12 @@ less memory use and the capacity for threading threaded::Bool = false end +isthreaded(s::VectorSolver) = s.threaded + function init(s::VectorSolver, A::AbstractMatrix) F = lu(A) Tb = Vector{eltype(A)} - if s.threaded + if isthreaded(s) # Create one init per thread # UMFPACK `copy` shares memory but avoids workspace race conditions nbuffers = Threads.nthreads() @@ -271,7 +273,7 @@ function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) # This is basically SparseArrays.UMFPACK._AqldivB_kernel! # But we unroll it to avoid copies or allocation of B - if s.threaded + if isthreaded(s) channel = Channel{typeof(init[1])}(length(init)) for x in init put!(channel, x) @@ -338,10 +340,12 @@ struct LinearSolver <: Solver end LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) +isthreaded(s::LinearSolver) = s.threaded + function LinearAlgebra.ldiv!(s::LinearSolver, (; linsolve, channel, b), B) # TODO: for now we define a Z matrix, but later modify ops # to run column by column without materialising Z - if s.threaded + if isthreaded(s) Threads.@threads for i in 1:size(B, 2) # Get column memory from the channel linsolve_t, b_t = take!(channel) @@ -454,17 +458,13 @@ end # This only makes sense if arrays are sorted large to small function _reshape(A::Array, dims::Tuple{Vararg{Int}}) len = prod(dims) - mem = getfield(A, :ref).mem if size(A) == dims A - elseif length(mem) >= len + elseif length(A) >= len v = vec(A) - # Hack to shrink the array - setfield!(v, :size, (len,)) + resize!(v, len) reshape(v, dims) else - error("Arrays were not sorted") - v = resize!(vec(A), len) - reshape(v, dims) + error("Arrays were not sorted. Current len: $(length(A)), needed len: $len") end end \ No newline at end of file diff --git a/src/tiles.jl b/src/tiles.jl index 43523f7..8743f25 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -30,10 +30,25 @@ to be run over the same windowed grids. end WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) +# function Base.show(io, mime::MIME"text/plain", p::WindowedProblem) +# println(io, typeof(p)) +# println(io, "centersize: ", p.centersize) +# println(io, "buffer: ", p.buffer) +# println(io, "threaded: ", p.threaded) +# println(io, "problem: ") +# show(io, mime, p.problem) +# end + centersize(p::WindowedProblem) = p.centersize, p.centersize +isthreaded(p::WindowedProblem) = p.threaded -solve(p::WindowedProblem, rast::RasterStack; kw...) = - solve!(init(p, rast), p; kw...) +function solve(p::WindowedProblem, rast::RasterStack; + verbose=false, test_windows=false, mosaic_return=true, timed=true, kw... +) + solve!(init(p, rast; verbose, kw...), p; + verbose, test_windows, mosaic_return, timed + ) +end function solve!(workspace, p::WindowedProblem; test_windows::Bool=false, verbose::Bool=false, @@ -44,7 +59,7 @@ function solve!(workspace, p::WindowedProblem; # Test outputs just return the inputs after window masking if test_windows output_stacks = map(window_indices) do i - _get_window_with_zeroed_buffer(p, rast, window_ranges[i]) + _get_window_with_zeroed_buffer(view, p, rast, window_ranges[i]) end return if mosaic_return Rasters.mosaic(sum, collect(skipmissing(output_stacks)); @@ -68,15 +83,15 @@ function solve!(workspace, p::WindowedProblem; window = window_ranges[iw] verbose && println("Running job $iw on ranges $window and thread $(Threads.threadid())") # verbose && println("Solving window $i $window ") - window_rast = _get_window_with_zeroed_buffer(p, rast, window) + window_rast = _get_window_with_zeroed_buffer(view, p, rast, window) # Initialise the window using stored memory verbose && println("Getting workspace from channel...") workspace = take!(ch) verbose && println("Initialising window from size $(size(window_rast)), from ranges $window...") - workspace = init!(workspace, p.problem, window_rast; verbose) + workspace_initialised = init!(workspace, p.problem, window_rast; verbose) # Solve for the window verbose && println("Solving window $window...") - output_stacks[i] = solve!(workspace, p.problem) + output_stacks[i] = solve!(workspace_initialised, p.problem) # Return the workspace to the channel put!(ch, workspace) end @@ -107,7 +122,7 @@ function solve!(workspace, p::WindowedProblem; end else if timed - return (; result=output_stacks, mosaic_elapsed) + return (; result=output_stacks) else return output_stacks end @@ -119,14 +134,20 @@ function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; window_ranges=_window_ranges(p, rast), window_sizes=_window_sizes(p, rast; window_ranges), window_indices=_window_indices(p, rast; window_ranges), - sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), + sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), + verbose=true, ) - n = max(length(window_indices), p.threaded ? Threads.nthreads() : 1) - largest_rast = rast[window_ranges[first(sorted_indices)]...] + n = min(length(window_indices), p.threaded ? Threads.nthreads() : 1) + # VERY important to use _get_window_with_zeroed_buffer here not just index the raster + # Otherwise memory use will be TB + largest_rast = _get_window_with_zeroed_buffer(view, p, rast, window_ranges[first(sorted_indices)]) + @show size(rast) + @show size(largest_rast) window_workspaces = if haskey(workspace, :window_workspaces) - [init!(ws, p.problem, largest_rast) for ws in window_workspaces] + @show length(workspace.window_workspaces) + [init!(ws, p.problem, largest_rast; verbose) for ws in window_workspaces] else - [init(p.problem, largest_rast) for _ in 1:n] + [init(p.problem, largest_rast; verbose) for _ in 1:n] end return (; rast, window_workspaces, window_sizes, window_ranges, window_indices, sorted_indices) end @@ -183,7 +204,6 @@ for nested operations. datapath::String grain::Union{Nothing,Int} = nothing ext::String = ".tif" - threaded::Bool = false end function BatchProblem(problem::Problem; centersize::Union{Int,Tuple{Int,Int}}, kw... @@ -218,23 +238,25 @@ function BatchProblem(problem::WindowedProblem; BatchProblem(; problem, buffer, centersize, kw...) end +# function Base.show(io, mime::MIME"text/plain", p::BatchProblem) +# println(io, typeof(p)) +# println(io, "centersize: ", p.centersize) +# println(io, "buffer: ", p.buffer) +# println(io, "datapath: ", p.datapath) +# println(io, "ext: ", p.ext) +# println(io, "grain: ", p.grain) +# println(io, "problem: ") +# show(io, mime, p.problem) +# end + centersize(p::BatchProblem) = p.centersize function solve(p::BatchProblem, rast::RasterStack; window_indices=_window_indices(p, rast), kw... ) - function run(i) + for i in eachindex(window_indices) solve(p, rast, i; window_indices, kw...) end - if p.threaded - Threads.@threads for i in eachindex(window_indices) - run(i) - end - else - for i in eachindex(window_indices) - run(i) - end - end end solve(p::BatchProblem, rast::RasterStack, i; kw...) = solve!(init(p, rast, i), p; kw...) @@ -247,15 +269,30 @@ function solve!(ws, p::BatchProblem; verbose=false, kw...) end function init(p::BatchProblem, rast::RasterStack, i::Int; - batch_ranges=_window_ranges(p, rast), - batch_indices=_window_indices(p, rast), + window_ranges=_window_ranges(p, rast), + window_indices=_window_indices(p, rast; window_ranges), + kw... +) + init!((; rast, window_ranges, window_indices), p, i; kw...) +end +function init!(workspace, p::BatchProblem, i::Int; + verbose=true, ) + (; window_indices, window_ranges, rast) = workspace + @show length(window_ranges) # Get the raster data for job i - @show i batch_indices batch_ranges - window = batch_ranges[batch_indices[i]] + window = window_ranges[window_indices[i]] + verbose && @show window # Just read the whole thing now to reduce reads in overlapping windows - batch_rast = read(_get_window_with_zeroed_buffer(p, rast, window)) - return (; rast=batch_rast, workspace=init(p.problem, batch_rast), batch=1, window) + batch_rast = if p.problem isa WindowedProblem + # We want to materialise the raster, and we don't need sparse targets + rast[window...] + else # isa Problem + # We also want to materialise the window, but with sparse targets + _get_window_with_zeroed_buffer(getindex, p, rast, window) + end + verbose && @show size(batch_rast) + return (; rast=batch_rast, workspace=init(p.problem, batch_rast; verbose), batch=1, window) end function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; @@ -265,38 +302,22 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack window_ranges = _window_ranges(p, rast) # Calculate window sizes and allocations - sizes_and_allocs = map(vec(window_ranges)) do rs + window_sizes = map(vec(window_ranges)) do rs window_rast = view(rast, rs...) - sze = _problem_size(p, window_rast) - allocs = allocations(p.problem, sze; nthreads, kw...) - sze, allocs + _problem_size(p, window_rast) end # Organise stats for each window into vectors - window_sizes = first.(sizes_and_allocs) - window_allocations = last.(sizes_and_allocs) window_mask = map(s -> prod(s) > 0, window_sizes) window_indices = eachindex(window_mask)[window_mask] - - # Caclulate allocations, with threading context - max_allocations = if p.threaded - # Take the top nthreads allocations - # Each thread will need to allocate its own workspace - sum(sort(window_allocations)[1:min(end, nthreads)]) - else - # One maximum workspace is allocated and reused - maximum(window_allocations; init=0) - end # Calculate global stats njobs = count(window_mask) shape = size(window_ranges) - return (; + WindowAssessment( shape, njobs, - max_allocations, - window_allocations, window_sizes, window_mask, window_indices, @@ -306,7 +327,7 @@ function assess( p::AbstractWindowedProblem{<:AbstractWindowedProblem}, rast::AbstractRasterStack; nthreads=Threads.nthreads(), - verbose=false, + verbose=true, kw... ) # Calculate outer window ranges @@ -322,7 +343,7 @@ function assess( end # Define a vector for all assessment data - assessments = Vector{Any}(undef, length(window_ranges)) + assessments = Vector{WindowAssessment}(undef, length(window_ranges)) # Run assessments threaded as they can take a long time for large rasters Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] @@ -332,7 +353,6 @@ function assess( verbose && println("Copy raster data") window_rast = open(rast) do o if map(length, rs) == size(window_rast) - @show map(length, rs) size(window_rast) _get_window_with_zeroed_buffer!(window_rast, p, o, rs) else _get_window_with_zeroed_buffer(getindex, p, o, rs) @@ -345,59 +365,29 @@ function assess( assess(p.problem, window_rast; nthreads, kw...) else verbose && println(" No targets found") - (; + WindowAssessment(; shape=(0, 0), njobs=0, - max_allocations=0, - window_allocations=Int[], - window_sizes=Tuple{Int,Int}[], - window_mask=Bool[], - window_indices=Int[], + sizes=Tuple{Int,Int}[], + mask=Bool[], + indices=Int[], ) end put!(channel, window_rast) end - - # Get vectors of vectors from inner problem - inner_window_allocations = map(a -> a.window_allocations, assessments) - inner_window_sizes = map(a -> a.window_sizes, assessments) - inner_window_masks = map(a -> a.window_mask, assessments) - inner_window_indices = map(a -> a.window_indices, assessments) - inner_window_counts = map(length, inner_window_sizes) - inner_window_jobs = map(a -> a.njobs, assessments) - - # Get outer problem vectors - window_mask = map(any, inner_window_masks) - window_indices = eachindex(vec(window_mask))[window_mask] - + # Get mask and indices + mask = map(a -> any(a.mask), assessments) + indices = eachindex(vec(mask))[mask] # Calculate global stats - max_allocations = if p.threaded - sum(sort(inner_allocations)[1:min(end, nthreads)]) - else - maximum(a -> maximum(a; init=0), inner_window_allocations; init=0) - end - njobs = count(window_mask) - max_windows = maximum(inner_window_counts) + njobs = count(mask) shape = size(window_ranges) - - fields = (; + return NestedAssessment( shape, njobs, - max_windows, - max_allocations, - window_indices, - window_mask, - inner_window_jobs, - inner_window_allocations, - inner_window_counts, - inner_window_sizes, - inner_window_indices, - inner_window_masks, + mask, + indices, + assessments, ) - - verbose && display(pairs(fields)) - - return fields end # Mosaic the stored files to a RasterStack @@ -453,8 +443,6 @@ function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRa # Create an iterator of ranges for retreiving each window return [map((i, s, ws) -> i:min(s, i + ws-1), Tuple(c), size, windowsize) for c in corners] end - -# _get_window_with_zeroed_buffer(dest, p, rast, axes(rast)) function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) source = view(rast, rs...) @@ -521,4 +509,33 @@ end _isvalid(x) = !isnan(x) && x > zero(x) -_resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file +_resolution(rast) = abs(step(lookup(rast, X))) + +abstract type ProblemAssessment end + +@kwdef struct WindowAssessment <: ProblemAssessment + shape::Tuple{Int,Int} + njobs::Int + sizes::Vector{Tuple{Int,Int}} + mask::Vector{Bool} + indices::Vector{Int} +end + +@kwdef struct NestedAssessment <: ProblemAssessment + shape::Tuple{Int,Int} + njobs::Int + mask::Vector{Bool} + indices::Vector{Int} + assessments::Vector{WindowAssessment} +end + +function Base.show(io::IO, mime::MIME"text/plain", bs::ProblemAssessment) + println(io, "NestedAssessment") + println(io) + println(io, "Raster shape: $(bs.shape)") + println(io, "Number of jobs: $(bs.njobs)") + # Use SparseArrays nice matrix printing for the mask + println(io, "Job mask: ") + mask = sparse(reshape(bs.mask, bs.shape)) + Base.print_array(io, mask) +end \ No newline at end of file From c50e7c11762586d05cf57c43dd4e86b8c099d2e1 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 13 Feb 2025 12:05:44 +0100 Subject: [PATCH 23/51] vector performance --- src/grid.jl | 8 +- src/gridrsp.jl | 75 ++--- src/randomizedshortestpath.jl | 7 +- src/solvers.jl | 599 +++++++++++++++++++--------------- src/tiles.jl | 4 - test/problem.jl | 30 +- 6 files changed, 409 insertions(+), 314 deletions(-) diff --git a/src/grid.jl b/src/grid.jl index 800c883..2e41654 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -17,20 +17,20 @@ Base.inv(::Inv) = Inv() Base.inv(::OddsAgainst) = OddsFor() Base.inv(::OddsFor) = OddsAgainst() -struct Grid +struct Grid{D<:Union{Tuple,Nothing},SQ,TQ} nrows::Int ncols::Int affinities::SparseMatrixCSC{Float64,Int} costfunction::Union{Nothing,Transformation} costmatrix::SparseMatrixCSC{Float64,Int} id_to_grid_coordinate_list::Vector{CartesianIndex{2}} - source_qualities::AbstractMatrix{Float64} - target_qualities::AbstractMatrix{Float64} + source_qualities::SQ + target_qualities::TQ targetidx::Vector{CartesianIndex{2}} targetnodes::Vector{Int} qs::Vector{Float64} qt::Vector{Float64} - dims::Union{Tuple,Nothing} + dims::D end """ diff --git a/src/gridrsp.jl b/src/gridrsp.jl index aa3bd9c..9edf238 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -33,12 +33,6 @@ end _get_grid(grsp::GridRSP) = grsp.g _get_grid(g::Grid) = g -_maybe_raster(mat::Raster, g) = mat -_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = - _maybe_raster(mat, dims(g)) -_maybe_raster(mat::AbstractMatrix, ::Nothing) = mat -_maybe_raster(mat::AbstractMatrix, dims::Tuple) = Raster(mat, dims) - function Base.show(io::IO, ::MIME"text/plain", grsp::GridRSP) print(io, summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) end @@ -54,14 +48,17 @@ DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) Compute RSP betweenness of all nodes weighted by source and target qualities. """ -function betweenness_qweighted(grsp::GridRSP; kw...) +function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; + output=fill(NaN, g.nrows, g.ncols), + kw... +) g = grsp.g betvec = RSP_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) - bet = fill(NaN, g.nrows, g.ncols) + coordinate_list = g.id_to_grid_coordinate_list for (i, v) in enumerate(betvec) - bet[g.id_to_grid_coordinate_list[i]] = v + output[coordinate_list[i]] = v end - return _maybe_raster(bet, grsp) + return _maybe_raster(output, grsp) end """ @@ -70,7 +67,7 @@ end Compute RSP betweenness of all edges weighted by source and target qualities. Returns a sparse matrix where element (i,j) is the betweenness of edge (i,j). """ -function edge_betweenness_qweighted(grsp::GridRSP; kw...) +function edge_betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; kw...) g = grsp.g return RSP_edge_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) end @@ -92,19 +89,23 @@ The optional `diagvalue` element specifies which value to use for the diagonal of the matrix of proximities, i.e. after applying the inverse cost function to the matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. """ -function betweenness_kweighted(grsp::GridRSP; proximities=nothing, kw...) +function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; + output=fill(NaN, g.nrows, g.ncols), + proximities=nothing, + kw... +) g = grsp.g if isnothing(proximities) proximities = _computeproximities(grsp; kw...) end betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) - bet = fill(NaN, g.nrows, g.ncols) + coordinate_list = g.id_to_grid_coordinate_list for (i, v) in enumerate(betvec) - bet[g.id_to_grid_coordinate_list[i]] = v + output[coordinate_list[i]] = v end - return _maybe_raster(bet, grsp) + return _maybe_raster(output, grsp) end """ @@ -115,7 +116,7 @@ end of proximities, i.e. after applying the inverse cost function to the matrix of expected costs. When nothing is specified, the diagonal elements won't be adjusted. """ -function edge_betweenness_kweighted(grsp::GridRSP; +function edge_betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; proximities=nothing, distance_transformation=nothing, diagvalue=nothing, @@ -145,26 +146,26 @@ end Compute RSP expected costs from all nodes. """ -expected_cost(grsp::GridRSP; kw...) = +expected_cost(grsp::Union{GridRSP,NamedTuple}; kw...) = RSP_expected_cost(grsp.W, grsp.g.costmatrix, grsp.Z, grsp.g.targetnodes; kw...) -free_energy_distance(grsp::GridRSP; kw...) = +free_energy_distance(grsp::Union{GridRSP,NamedTuple}; kw...) = RSP_free_energy_distance(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -survival_probability(grsp::GridRSP; kw...) = +survival_probability(grsp::Union{GridRSP,NamedTuple}; kw...) = RSP_survival_probability(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -power_mean_proximity(grsp::GridRSP; kw...) = +power_mean_proximity(grsp::Union{GridRSP,NamedTuple}; kw...) = RSP_power_mean_proximity(grsp.Z, grsp.θ, grsp.g.targetnodes; kw...) -least_cost_distance(grsp::GridRSP; kw...) = least_cost_distance(grsp.g; kw...) +least_cost_distance(grsp::Union{GridRSP,NamedTuple}; kw...) = least_cost_distance(grsp.g; kw...) """ mean_kl_divergence(grsp::GridRSP)::Float64 Compute the mean Kullback–Leibler divergence between the free energy distances and the RSP expected costs for `grsp::GridRSP`. """ -function mean_kl_divergence(grsp::GridRSP; +function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; free_energy_distances=nothing, expected_costs=nothing, kw... @@ -180,10 +181,10 @@ function mean_kl_divergence(grsp::GridRSP; else expected_costs end - return mean_kl_divergence(grsp::GridRSP, free_energy_distances, expected_costs; kw...) + return mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; kw...) end -function mean_kl_divergence(grsp::GridRSP, free_energy_distances, expected_costs; +function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; workspaces=(similar(grsp.Z),), kw... ) g = grsp.g @@ -197,7 +198,7 @@ end Compute the mean Kullback–Leibler divergence between the least-cost path and the random path distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. """ -function mean_lc_kl_divergence(grsp::GridRSP; +function mean_lc_kl_divergence(grsp::Union{GridRSP,NamedTuple}; workspaces=[similar(grsp.Z)], kw... ) @@ -269,7 +270,7 @@ end Compute the least cost Kullback-Leibler divergence from each cell in the g in `h` to the `target` cell. """ -function least_cost_kl_divergence(grsp::GridRSP, target::Tuple{Int,Int}; kw...) +function least_cost_kl_divergence(grsp::Union{GridRSP,NamedTuple}, target::Tuple{Int,Int}; kw...) g = grsp.g targetnode = findfirst(isequal(CartesianIndex(target)), g.id_to_grid_coordinate_list) if targetnode === nothing @@ -333,14 +334,15 @@ function connected_habitat( return connected_habitat(grsp, S, diagvalue=diagvalue) end -function connected_habitat(grsp::GridRSP; proximities=nothing, kw...) +function connected_habitat(grsp::Union{GridRSP,NamedTuple}; proximities=nothing, kw...) if isnothing(proximities) proximities = _computeproximities(grsp; kw...) end return connected_habitat(grsp, proximities; kw...) end -function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; +function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; diagvalue::Union{Nothing,Real}=nothing, + output=fill(NaN, g.nrows, g.ncols), kw... ) g = _get_grid(grsp) @@ -353,14 +355,13 @@ function connected_habitat(grsp::Union{Grid,GridRSP}, S::Matrix; funvec = connected_habitat(g.qs, g.qt, S; kw...) - func = fill(NaN, g.nrows, g.ncols) for (ij, x) in zip(g.id_to_grid_coordinate_list, funvec) - func[ij] = x + output[ij] = x end - return _maybe_raster(func, grsp) + return _maybe_raster(output, grsp) end -function connected_habitat(grsp::GridRSP, +function connected_habitat(grsp::Union{GridRSP,NamedTuple}, cell::CartesianIndex{2}; distance_transformation=nothing, diagvalue=nothing, @@ -431,7 +432,7 @@ to proximities by `distance_transformation` which defaults to the inverse of the in the underlying `Grid` (if defined). Optionally, the diagonal values of the proximity matrix may be set to `diagvalue`. The `tol` argument specifies the convergence tolerance in the Arnoldi based eigensolver. """ -function LinearAlgebra.eigmax(grsp::GridRSP; +function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, @@ -567,10 +568,11 @@ for the cell to `avalue` as well as the source and target qualities associated w the cell to `qˢvalue` and `qᵗvalue` respectively. It is required that `avalue` is positive to avoid that the graph becomes disconnected. """ -function criticality(grsp::GridRSP; +function criticality(grsp::Union{GridRSP,NamedTuple}; distance_transformation=nothing, diagvalue=nothing, avalue=floatmin(), + output=fill(NaN, size(grsp.g)), qˢvalue=0.0, qᵗvalue=0.0, kw... @@ -588,10 +590,9 @@ function criticality(grsp::GridRSP; )) end - landscape = fill(NaN, size(grsp.g)) - landscape[g.targetidx] = critvec + output[g.targetidx] = critvec - return _maybe_raster(landscape, grsp) + return _maybe_raster(output, grsp) end function _computeproximities(grsp; diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index bcd155c..d5ebfe2 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -10,7 +10,7 @@ end _inv(Z) = _inv!(similar(Z), Z) function _inv!(Zⁱ, Z) - broadcast(Z) do x + broadcast!(Zⁱ, Z) do x x = inv(x) isfinite(x) ? x : floatmax(eltype(Z)) end @@ -56,7 +56,8 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, ZqˢZⁱqᵗZt = ldiv!(solver, Aadj_init, qˢZⁱqᵗ; B_copy=copy!(workspace2, qˢZⁱqᵗ)) ZqˢZⁱqᵗZt .*= Z - return sum(ZqˢZⁱqᵗZt, dims=2) # diag(Z * ZqˢZⁱqᵗ') + # TODO remove this allocation + return sum.(eachslice(ZqˢZⁱqᵗZt, dims=1)) # diag(Z * ZqˢZⁱqᵗ') end @@ -221,9 +222,9 @@ function RSP_expected_cost(W::SparseMatrixCSC, A_init=init(solver, A), workspaces=[similar(Z), similar(Z)], expected_costs=similar(Z), + CW=C .* W, kw... ) - CW = C .* W workspace1, workspace2 = workspaces if axes(W) != axes(C) throw(DimensionMismatch("")) diff --git a/src/solvers.jl b/src/solvers.jl index 54a9549..7c12fd2 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -1,10 +1,71 @@ -# Defined in ConScape.jl for load order -# abstract type Solver end +""" + MatrixSolver(; check) + +Solve all operations on a fully materialised Z matrix. + +This is fast but memory inneficient for CPUS, and isn't threaded. +But may be best for GPUs using CuSSP.jl ? +""" +@kwdef struct MatrixSolver <: Solver + check::Bool = true +end + +""" + VectorSolver(; check, threaded) + +Use julias default solver but broken into columns, with +less memory use and the capacity for threading +""" +@kwdef struct VectorSolver <: Solver + check::Bool = true + threaded::Bool = false +end + +""" + LinearSolver(args...; threded, kw...) + +Solve all operations column-by-column using LinearSolve.jl solvers. + +The `threaded` keyword specifies if threads are used per target. +Other arguments and keywords are passed to `LinearSolve.solve` after the +problem object, like: + +````julia +`LinearSolve.solve(linearproblem, args...; kw...)` +```` + +# Example + +This example uses LinearSolve.jl wth `KrylovJL_GMRES` and a preconditioner. + +TODO: an example that is realistic + +````julia +using LinearSolve +distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()), +problem = ConScape.Problem(; + solver = LinearSolver(KrylovJL_GMRES(precs = (A, p) -> (Diagonal(A), I))) + graph_measures = (; + func=ConScape.ConnectedHabitat(), + qbetw=ConScape.BetweennessQweighted(), + ), + connectivity_measure = ConScape.ExpectedCost(θ=1.0), +) +```` +""" +struct LinearSolver <: Solver + args + keywords + threaded::Bool +end +LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) + +# In `init!` we allocate all large dense arrays function init!( ws::NamedTuple, - s::Solver, + solver::Solver, cm::FundamentalMeasure, - p::AbstractProblem, + p::AbstractProblem, rast::RasterStack; verbose=false, ) @@ -16,7 +77,7 @@ function init!( verbose && println("Defining sparse arrays...") # B_dense becomes Z verbose && println("Allocating workspaces...") - sze = (size(g.costmatrix, 1), length(g.targetnodes)) + sze = _workspace_size(solver, g) Z = if hastrait(needs_inv, gms) if haskey(ws, :Z) _reshape(ws.Z, sze) @@ -58,62 +119,220 @@ function init!( else nothing end + # TODO handle mixed distance functions + outputs = if cm.distance_transformation isa NamedTuple + map(gms) do gm + if needs_connectivity(gm) + map(cm.distance_transformation) do dt + returntype(gm) isa ReturnsDenseSpatial ? fill(0.0, size(rast)) : nothing + end + else + fill(0.0, size(rast)) + end + end + else + map(gms) do gm + returntype(gm) isa ReturnsDenseSpatial ? fill(0.0, size(rast)) : nothing + end + end verbose && println("Finished allocating...") - return (; Z, Zⁱ, workspaces, permuted_workspaces, grid, free_energy_distances, expected_costs, proximities) + return (; Z, Zⁱ, workspaces, permuted_workspaces, g=grid, grid, free_energy_distances, expected_costs, proximities, outputs) +end +function init!( + workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; + verbose=false, +) + # TODO what is needed here? + return (; grid=Grid(p, rast)) +end + +# Solver init +init(::Union{Nothing,Solver}, A::AbstractMatrix) = (; F=lu(A)) +# function init(s::VectorSolver, A::AbstractMatrix) +# F = lu(A) +# Tb = Vector{eltype(A)} +# # if isthreaded(s) +# # # Create one init per thread +# # # UMFPACK `copy` shares memory but avoids workspace race conditions +# # nbuffers = Threads.nthreads() +# # [ +# # (; +# # F=(i == 1 ? F : copy(F)), +# # b=Tb(undef, size(A, 2)) +# # ) +# # for i in 1:nbuffers +# # ] +# # else +# b = Tb(undef, size(A, 2)) +# return (; F, b) +# # end +# end +function init(s::LinearSolver, A) + b = zeros(eltype(A), size(A, 2)) + # Define and initialise the linear problem + linprob = LinearProblem(A, b) + linsolve = init(linprob, s.args...; s.keywords...) + # TODO what is needed here? + nbuffers = Threads.nthreads() + # Create a channel to store problem b vectors for threads + # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ + channel = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) + for i in 1:nbuffers + # TODO fix this in LinearSolve.jl with batching + # We should not need to `deepcopy` the whole problem we + # just need to replicate the specific workspace arrays + # that will cause race conditions. + # But currently there is no parallel mode for LinearSolve.jl + # See https://github.com/SciML/LinearSolve.jl/issues/552 + put!(channel, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(A, 2)))) + end + return (; linsolve, channel, b) end # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve!( workspace::NamedTuple, - s::Solver, + s::Union{MatrixSolver,LinearSolver}, cm::ConnectivityMeasure, p::AbstractProblem; verbose=false ) - g = workspace.grid - return map(p.graph_measures) do gm - compute(gm, p, ; workspace...) + g = workspace.g + return map(p.graph_measures, workspace.outputs) do gm, output + compute(gm, p, ; workspace..., output) + end +end +function solve!( + ws::NamedTuple, + solver::Union{MatrixSolver,LinearSolver}, + cm::FundamentalMeasure, + p::Problem; + verbose=false, +) + ws1 = _init_sparse(ws, solver, cm, p, ws.grid; verbose) + ws2 = _solve_dense!(ws1, solver, cm, p; verbose) + gms = graph_measures(p) + results = _solve!(ws2, solver, cm, cm.distance_transformation, gms, p; verbose) + return _merge_to_stack(results) +end +function solve!( + ws::NamedTuple, + solver::VectorSolver, + cm, + p::Problem; + verbose=false, +) + # Get grid and preallocated vectors + (; g) = ws + gms = graph_measures(p) + # Predefine min-vectors for targets (not worth putting in the workspace) + target_qualities = g.target_qualities[g.targetnodes[1]] + targetidx = g.targetidx[1:1] + targetnodes = g.targetnodes[1:1] + qt = g.qt[1:1] + target_allocs = (; target_qualities, targetidx, targetnodes, qt) + _update_targets!(target_allocs, g, 1) + target_properties = (; targetidx, targetnodes, qt) + target_grid = ConstructionBase.setproperties(g, target_properties) + first = true + ws1 =_init_sparse(ws, solver, cm, p, target_grid; verbose) + ws2 = merge(ws1, (; grid=target_grid, g=target_grid)) + target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) + target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) + result1 = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) + target_results = Vector{typeof(result1)}(undef, length(g.targetnodes)) + target_results[1] = result1 + # solve one target at a time + for i in eachindex(g.targetnodes)[2:end] + target_qualities = g.target_qualities[g.targetidx[i]] + _update_targets!(target_allocs, g, i) + first = false + # And rebuild the workspace with the new grid + target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) + # Use the matrix solve on this smaller problem + target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) + result = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) + target_results[i] = result + end + return _merge_to_stack(_maybe_raster(ws.outputs, g)) +end + +function _solve!(workspace, solver, cm, dt::NamedTuple{DT}, gms::NamedTuple{GMS}, p; verbose) where {DT,GMS} + (; grid, Pref, W, Z, outputs) = workspace + # GridRSP is just a wrapper now, we can remove it later + grsp = GridRSP(grid, cm.θ, Pref, W, Z) + # Map over both distance transformations and graph measures + nested = map(values(dt), DT) do dt, k + cm1 = ConstructionBase.setproperties(cm, (; distance_transformation=dt)) + hastrait(needs_proximity, gms) && + _setproximities!(workspace.proximities, workspace.expected_costs, cm1, p, grsp) + # Rebuild the problem with a connectivity measure + # holding a single distance transformation, in case its used + p1 = ConstructionBase.setproperties(p, (; connectivity_measure=cm1)) + map(gms, outputs) do gm, os + if needs_connectivity(gm) + compute(gm, p1, grsp; workspace..., output=os[k]) + else + nothing + end + end + end |> NamedTuple{DT} + # Map over graph measures that don't need connectivity + flat = map(gms, outputs) do gm, output + if needs_connectivity(gm) + nothing + else + compute(gm, p, grsp; workspace..., output) + end + end + return _combine_nested_flat(gms, nested, flat) +end +Base.@assume_effects :foldable function _combine_nested_flat( + gms::NamedTuple{GMS}, nested, flat +) where GMS + # Combine nested and flat results + map(GMS) do k + f = flat[k] + if isnothing(f) + map(n -> n[k], nested) + else + f + end + end |> NamedTuple{GMS} +end +function _solve!(workspace, solver, cm, dt, gms::NamedTuple{GMS}, p; verbose) where GMS + (; grid, Pref, W, Z, outputs) = workspace + # GridRSP is just a wrapper now, we can remove it later + grsp = GridRSP(grid, cm.θ, Pref, W, Z) + hastrait(needs_proximity, gms) && + _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, grsp) + # Map over graph measures + map(p.graph_measures, outputs) do gm, output + compute(gm, p, grsp; workspace..., output) end end +function _update_targets!(a, g, i) + # target_qualities[:, 1] = g.target_qualities[i] + a.targetidx[1] = g.targetidx[i] + a.targetnodes[1] = g.targetnodes[i] + a.qt[1] = g.qt[i] + return nothing +end + # Do all the work shared accross outputs -function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; +function _solve_dense!(ws::NamedTuple, solver::Solver, cm, p::Problem; verbose=false ) - (; grid) = ws + (; grid, W, Pref, A, A_init, Aadj_init, Aadj) = ws gms = graph_measures(p) cf = connectivity_function(p) - - verbose && println("Initialising factorizations...") - Pref = _Pref(grid.affinities) - W = _W(Pref, cm.θ, grid.costmatrix) # Sparse rhs + # TODO get rid of this allocation + # For VectorSolver we can write values directly to B B_sparse = sparse_rhs(grid.targetnodes, size(grid.costmatrix, 1)) - # Sparse lfs - A = I - W - A_init = init(solver, A) - Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) - # Just take the adjoint of the factorization of A - # where possible to save calculations and memory - Aadj_init, Aadj = if hasproperty(A_init, :F) - Aadj = A' - # Use adjoint factorization of A rather than recalculating for A' - Aadj_init = merge(A_init, (; F=A_init.F')) - Aadj_init, Aadj - else - # LinearSolve.jl cant handle the adjoint - # so we duplicate work and allocations - Aadj = sparse(A') - Aadj_init = init(solver, Aadj) - Aadj_init, Aadj - end - Aadj_init, Aadj - else - nothing, nothing - end - Z = if hastrait(needs_Z, gms) # verbose && B = _reshape(ws.Z, size(B_sparse)) @@ -143,204 +362,49 @@ function _shared_solves!(ws::NamedTuple, solver::Solver, cm, p::Problem; ConScape.free_energy_distance(grsp; workspace..., free_energy_distances, solver) end - return (; ws..., Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ, expected_costs, free_energy_distances) + return merge(ws, (; Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ, expected_costs, free_energy_distances)) end -function solve!( - ws::NamedTuple, - solver::Solver, - cm::FundamentalMeasure, - p::Problem; - verbose=false, -) - workspace = _shared_solves!(ws, solver, cm, p; verbose) +function _init_sparse(ws::NamedTuple, solver, cm, p::Problem, grid::Grid; verbose) gms = graph_measures(p) - (; grid, Pref, W, Z) = workspace - # GridRSP is just a wrapper now, we can remove it later - grsp = GridRSP(grid, cm.θ, Pref, W, Z) - distance_transformation = cm.distance_transformation - results = if distance_transformation isa NamedTuple - # Map over both distance transformations and graph measures - nested = map(distance_transformation) do dt - cm1 = ConstructionBase.setproperties(cm, (; distance_transformation=dt)) - hastrait(needs_proximity, gms) && - _setproximities!(workspace.proximities, workspace.expected_costs, cm1, p, grsp) - # Rebuild the problem with a connectivity measure - # holding a single distance transformation, in case its used - p1 = ConstructionBase.setproperties(p, (; connectivity_measure=cm1)) - map(gms) do gm - if needs_connectivity(gm) - compute(gm, p1, grsp; workspace...) - else - nothing - end - end - end - # Map over graph measures that don't need connectivity - flat = map(gms) do gm - if needs_connectivity(gm) - nothing - else - compute(gm, p, grsp; workspace...) - end - end - # Combine nested and flat results - map(keys(gms)) do k - f = flat[k] - if isnothing(f) - map(n -> n[k], nested) - else - f - end - end |> NamedTuple{keys(gms)} - else - hastrait(needs_proximity, gms) && - _setproximities!(workspace.proximities, workspace.expected_costs, cm, p, grsp) - # Map over graph measures - map(p.graph_measures) do gm - compute(gm, p, grsp; workspace...) + verbose && println("Initialising sparse factorizations...") + Pref = _Pref(grid.affinities) + W = _W(Pref, cm.θ, grid.costmatrix) + # Sparse lhs + A = I - W + A_init = init(solver, A) + Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) + # Just take the adjoint of the factorization of A + # where possible to save calculations and memory + Aadj_init, Aadj = if hasproperty(A_init, :F) + Aadj = A' + # Use adjoint factorization of A rather than recalculating for A' + Aadj_init = merge(A_init, (; F=A_init.F')) + Aadj_init, Aadj + else + # LinearSolve.jl cant handle the adjoint + # so we duplicate work and allocations + Aadj = sparse(A') + Aadj_init = init(solver, Aadj) + Aadj_init, Aadj end - end - return _merge_to_stack(results) -end - - -function _init!( - workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; - verbose=false, -) - # TODO what is needed here? - return (; grid=Grid(p, rast)) -end - -LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = - ldiv!(solver, init(solver, A), B; kw...) - -""" - MatrixSolver(; check) - -Solve all operations on a fully materialised Z matrix. - -This is fast but memory inneficient for CPUS, and isn't threaded. -But may be best for GPUs using CuSSP.jl ? -""" -@kwdef struct MatrixSolver <: Solver - check::Bool = true -end - -init(::Union{Nothing,MatrixSolver}, A::AbstractMatrix) = (; F=lu(A)) - -# TODO: no type pyracy -LinearAlgebra.ldiv!(::Union{MatrixSolver,Nothing}, (; F), B; B_copy=copy(B)) = - ldiv!(B, F, B_copy) - -""" - VectorSolver(; check, threaded) - -Use julias default solver but broken into columns, with -less memory use and the capacity for threading -""" -@kwdef struct VectorSolver <: Solver - check::Bool = true - threaded::Bool = false -end - -isthreaded(s::VectorSolver) = s.threaded - -function init(s::VectorSolver, A::AbstractMatrix) - F = lu(A) - Tb = Vector{eltype(A)} - if isthreaded(s) - # Create one init per thread - # UMFPACK `copy` shares memory but avoids workspace race conditions - nbuffers = Threads.nthreads() - [ - (; - F=(i == 1 ? F : copy(F)), - b=Tb(undef, size(A, 2)) - ) - for i in 1:nbuffers - ] + Aadj_init, Aadj else - b = Tb(undef, size(A, 2)) - return [(; F, b)] + nothing, nothing end -end -function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) - # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) - transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A - - # This is basically SparseArrays.UMFPACK._AqldivB_kernel! - # But we unroll it to avoid copies or allocation of B - if isthreaded(s) - channel = Channel{typeof(init[1])}(length(init)) - for x in init - put!(channel, x) - end - # Create a channel to store problem b vectors for threads - # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ - Threads.@threads for col in 1:size(B, 2) - # Get a workspace from the channel - F_t, b_t = take!(channel) - # Copy a column from B - b_t .= view(B, :, col) - # Solve for the column - SparseArrays.UMFPACK.solve!(view(B, :, col), F_t, b_t, transposeoptype) - # Reuse the workspace - put!(channel, (F_t, b_t)) - end - else - (; F, b) = init[1] - for col in 1:size(B, 2) - b .= view(B, :, col) - SparseArrays.UMFPACK.solve!(view(B, :, col), F, b, transposeoptype) - end - end + CW = grid.costmatrix .* W - return B + return merge(ws, (; W, Pref, A, A_init, Aadj_init, Aadj, CW)) end -""" - LinearSolver(args...; threded, kw...) - -Solve all operations column-by-column using LinearSolve.jl solvers. - -The `threaded` keyword specifies if threads are used per target. -Other arguments and keywords are passed to `LinearSolve.solve` after the -problem object, like: - -````julia -`LinearSolve.solve(linearproblem, args...; kw...)` -```` - -# Example - -This example uses LinearSolve.jl wth `KrylovJL_GMRES` and a preconditioner. - -TODO: an example that is realistic - -````julia -using LinearSolve -problem = ConScape.Problem(; - solver = LinearSolver(KrylovJL_GMRES(precs = (A, p) -> (Diagonal(A), I))) - graph_measures = (; - func=ConScape.ConnectedHabitat(), - qbetw=ConScape.BetweennessQweighted(), - ), - distance_transformation = (exp=x -> exp(-x/75), oddsfor=ConScape.OddsFor()), - connectivity_measure = ConScape.ExpectedCost(θ=1.0), -) -```` -""" -struct LinearSolver <: Solver - args - keywords - threaded::Bool -end -LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) +_workspace_size(::Solver, g) = size(g.costmatrix, 1), length(g.targetnodes) +# Vector solver is one target at a time +_workspace_size(::VectorSolver, g) = size(g.costmatrix, 1), 1 +isthreaded(s::Solver) = false isthreaded(s::LinearSolver) = s.threaded +isthreaded(s::VectorSolver) = s.threaded function LinearAlgebra.ldiv!(s::LinearSolver, (; linsolve, channel, b), B) # TODO: for now we define a Z matrix, but later modify ops @@ -373,29 +437,43 @@ function LinearAlgebra.ldiv!(s::LinearSolver, (; linsolve, channel, b), B) end return B end - -function init(s::LinearSolver, A) - b = zeros(eltype(A), size(A, 2)) - # Define and initialise the linear problem - linprob = LinearProblem(A, b) - linsolve = init(linprob, s.args...; s.keywords...) - # TODO what is needed here? - nbuffers = Threads.nthreads() - # Create a channel to store problem b vectors for threads - # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ - channel = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) - for i in 1:nbuffers - # TODO fix this in LinearSolve.jl with batching - # We should not need to `deepcopy` the whole problem we - # just need to replicate the specific workspace arrays - # that will cause race conditions. - # But currently there is no parallel mode for LinearSolve.jl - # See https://github.com/SciML/LinearSolve.jl/issues/552 - put!(channel, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(A, 2)))) - end - return (; linsolve, channel, b) -end - +LinearAlgebra.ldiv!(::Union{MatrixSolver,VectorSolver,Nothing}, (; F), B; B_copy=copy(B)) = + ldiv!(B, F, B_copy) +# LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = + # ldiv!(solver, init(solver, A), B; kw...) +# function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) +# # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) +# transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A + +# # This is basically SparseArrays.UMFPACK._AqldivB_kernel! +# # But we unroll it to avoid copies or allocation of B +# if isthreaded(s) +# channel = Channel{typeof(init[1])}(length(init)) +# for x in init +# put!(channel, x) +# end +# # Create a channel to store problem b vectors for threads +# # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ +# Threads.@threads for col in 1:size(B, 2) +# # Get a workspace from the channel +# F_t, b_t = take!(channel) +# # Copy a column from B +# b_t .= view(B, :, col) +# # Solve for the column +# SparseArrays.UMFPACK.solve!(view(B, :, col), F_t, b_t, transposeoptype) +# # Reuse the workspace +# put!(channel, (F_t, b_t)) +# end +# else +# (; F, b) = init[1] +# for col in 1:size(B, 2) +# b .= view(B, :, col) +# SparseArrays.UMFPACK.solve!(view(B, :, col), F, b, transposeoptype) +# end +# end + +# return B +# end # Utils # We may have multiple distance_measures per @@ -406,7 +484,7 @@ function _merge_to_stack(nt::NamedTuple{K}) where K unique_nts = map(K) do k _mergename(Val{k}(), nt[k]) end - # merge unique layers into a sinlge RasterStack + # merge unique layers into a single RasterStack nt = merge(unique_nts...) if all(map(x -> x isa Raster, nt)) return RasterStack(nt) @@ -414,9 +492,17 @@ function _merge_to_stack(nt::NamedTuple{K}) where K return nt # Cant return a RasterStack for these outputs end end + +_maybe_raster(x) = x _maybe_raster(x::Raster) = x _maybe_raster(x::Number) = Raster(fill(x), ()) -_maybe_raster(x) = x +_maybe_raster(mat::Raster, g) = mat +_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = + _maybe_raster(mat, dims(g)) +_maybe_raster(mats::NamedTuple, g::Union{Grid,GridRSP}) = + map(mat -> _maybe_raster(mat, g), mats) +_maybe_raster(mat::AbstractMatrix, ::Nothing) = mat +_maybe_raster(mat::AbstractMatrix, dims::Tuple) = Raster(mat, dims) function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1, K2} # Combine outer and inner names with an underscore @@ -448,9 +534,10 @@ function _setproximities!( g = grsp.g dt = cm.distance_transformation if isnothing(dt) - dt = inv(g.costfunction) + map!(inv(g.costfunction), proximities, expected_costs) + else + map!(dt, proximities, expected_costs) end - map!(dt, proximities, expected_costs) _maybe_set_diagonal!(proximities, g, diagvalue(p)) return proximities end @@ -460,11 +547,13 @@ function _reshape(A::Array, dims::Tuple{Vararg{Int}}) len = prod(dims) if size(A) == dims A - elseif length(A) >= len + else # if length(A) >= len + # TODO make sure this doesn't allocate when the array is larger + # We may need julia 1.11 to do this properly v = vec(A) resize!(v, len) reshape(v, dims) - else - error("Arrays were not sorted. Current len: $(length(A)), needed len: $len") + # else + # error("Arrays were not sorted. Current len: $(length(A)), needed len: $len") end end \ No newline at end of file diff --git a/src/tiles.jl b/src/tiles.jl index 8743f25..e47b3a2 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -141,10 +141,7 @@ function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; # VERY important to use _get_window_with_zeroed_buffer here not just index the raster # Otherwise memory use will be TB largest_rast = _get_window_with_zeroed_buffer(view, p, rast, window_ranges[first(sorted_indices)]) - @show size(rast) - @show size(largest_rast) window_workspaces = if haskey(workspace, :window_workspaces) - @show length(workspace.window_workspaces) [init!(ws, p.problem, largest_rast; verbose) for ws in window_workspaces] else [init(p.problem, largest_rast; verbose) for _ in 1:n] @@ -279,7 +276,6 @@ function init!(workspace, p::BatchProblem, i::Int; verbose=true, ) (; window_indices, window_ranges, rast) = workspace - @show length(window_ranges) # Get the raster data for job i window = window_ranges[window_indices[i]] verbose && @show window diff --git a/test/problem.jl b/test/problem.jl index 9ab6505..eca1809 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -26,11 +26,11 @@ graph_measures = graph_measures = (; betq=ConScape.BetweennessQweighted(), betk=ConScape.BetweennessKweighted(), # # TODO sens=ConScape.Sensitivity(), - ebetq=ConScape.EdgeBetweennessQweighted(), - ebetk=ConScape.EdgeBetweennessKweighted(), - mkld=ConScape.MeanKullbackLeiblerDivergence(), - mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), - eigmax=ConScape.EigMax(), + # ebetq=ConScape.EdgeBetweennessQweighted(), + # ebetk=ConScape.EdgeBetweennessKweighted(), + # mkld=ConScape.MeanKullbackLeiblerDivergence(), + # mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + # eigmax=ConScape.EigMax(), # crit=ConScape.Criticality(), # very very slow, each target makes a new grid ) distance_transformation = (nodist=nothing, one=one, exp50=t -> exp(-t/50)) @@ -59,10 +59,10 @@ solvers = ( # ConScape.VectorSolver(; threaded=true), # ConScape.LinearSolver(), ) -solver = ConScape.VectorSolver(; threaded=true) solver = ConScape.MatrixSolver() +solver = ConScape.VectorSolver() -for solver in solvers +# for solver in solvers println("\n Testing with solver: ", solver) # Basic Problem problem = ConScape.Problem(; @@ -77,10 +77,17 @@ for solver in solvers end end - result = ConScape.solve!(workspace, problem); - @test workspace.expected_costs == ConScape.expected_cost(test_grsp) - @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) - @test workspace.Z == test_grsp.Z + using Cthulhu + @descend + using BenchmarkTools + using ProfileView + ProfileView. + ConScape.solve!(workspace, problem); + if solver isa ConScape.MatrixSolver + @test workspace.expected_costs == ConScape.expected_cost(test_grsp) + @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) + @test workspace.Z == test_grsp.Z + end # @profview result = ConScape.solve(problem, workspace) @test result isa NamedTuple @@ -108,6 +115,7 @@ for solver in solvers @testset "k-weighted" begin @test result.betk_nodist isa Raster bet = ConScape.betweenness_kweighted(test_grsp) + result @test isapprox(result.betk_nodist[21:23, 31:33], [0.04063917813171917 0.06843246983487516 0.08862506281612659 0.03684621201600996 0.10352876485995872 0.1255652231824746 From bf7091fb575ebbda22b2025caffcc304d5a5bb24 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 13 Feb 2025 16:10:48 +0100 Subject: [PATCH 24/51] fix tests for vector targets --- src/gridrsp.jl | 12 +- src/solvers.jl | 283 ++++++++++++++++++++++-------------------- src/tiles.jl | 4 +- test/problem.jl | 312 +++++++++++++++++++---------------------------- test/windowed.jl | 158 ++++++++++++++++++++++++ 5 files changed, 444 insertions(+), 325 deletions(-) create mode 100644 test/windowed.jl diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 9edf238..fe1d201 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -55,8 +55,9 @@ function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; g = grsp.g betvec = RSP_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) coordinate_list = g.id_to_grid_coordinate_list + for (i, v) in enumerate(betvec) - output[coordinate_list[i]] = v + output[coordinate_list[i]] += v end return _maybe_raster(output, grsp) end @@ -90,7 +91,7 @@ of the matrix of proximities, i.e. after applying the inverse cost function to t matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. """ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; - output=fill(NaN, g.nrows, g.ncols), + output=fill(NaN, size(grsp.g)), proximities=nothing, kw... ) @@ -101,10 +102,9 @@ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) coordinate_list = g.id_to_grid_coordinate_list - for (i, v) in enumerate(betvec) - output[coordinate_list[i]] = v - end + output[coordinate_list] .+= betvec + # display(heatmap(output)) return _maybe_raster(output, grsp) end @@ -342,7 +342,7 @@ function connected_habitat(grsp::Union{GridRSP,NamedTuple}; proximities=nothing, end function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; diagvalue::Union{Nothing,Real}=nothing, - output=fill(NaN, g.nrows, g.ncols), + output=fill(NaN, size(grsp.g)), kw... ) g = _get_grid(grsp) diff --git a/src/solvers.jl b/src/solvers.jl index 7c12fd2..039fb41 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -53,25 +53,59 @@ problem = ConScape.Problem(; ) ```` """ -struct LinearSolver <: Solver - args - keywords +struct LinearSolver{A,K} <: Solver + args::A + keywords::K threaded::Bool end LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) # In `init!` we allocate all large dense arrays function init!( + ws::NamedTuple, + solver::MatrixSolver, + cm::FundamentalMeasure, + p::AbstractProblem, + rast::RasterStack; + verbose=false, +) + _init!(ws, solver, cm, p, rast; verbose) +end +function init!( + ws::NamedTuple, + solver::Union{VectorSolver,LinearSolver}, + cm::FundamentalMeasure, + p::AbstractProblem, + rast::RasterStack; + verbose=false, +) + grid = Grid(p, rast) + workspace = _init!(ws, solver, cm, p, rast; verbose) + if isthreaded(solver) + nbuffers = Thread.nthreads() + channel = Channel{typeof(workspace)}(nbuffers) + put!(channel, workspace) + for n in 2:nbuffers + workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) + put!(channel, workspace_N) + end + return (; channel) + else + return workspace + end +end +function _init!( ws::NamedTuple, solver::Solver, cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, + grid=Grid(p, rast) ) verbose && println("Defining grid for RasterStack size $(size(rast))...") - grid = g = Grid(p, rast) verbose && println("Retreiving measures...") + g = grid gms = graph_measures(p) cf = connectivity_function(p) verbose && println("Defining sparse arrays...") @@ -119,20 +153,29 @@ function init!( else nothing end - # TODO handle mixed distance functions - outputs = if cm.distance_transformation isa NamedTuple + function matrix_or_nothing(gm) + if returntype(gm) isa ReturnsDenseSpatial + A = fill(NaN, size(rast)) + A[grid.id_to_grid_coordinate_list] .= 0.0 + A + else + nothing + end + end + # We don't re-use outputs + outputs = if distance_transformation(cm) isa NamedTuple map(gms) do gm if needs_connectivity(gm) - map(cm.distance_transformation) do dt - returntype(gm) isa ReturnsDenseSpatial ? fill(0.0, size(rast)) : nothing + map(distance_transformation(cm)) do dt + matrix_or_nothing(gm) end else - fill(0.0, size(rast)) + matrix_or_nothing(gm) end end else map(gms) do gm - returntype(gm) isa ReturnsDenseSpatial ? fill(0.0, size(rast)) : nothing + matrix_or_nothing(gm) end end @@ -143,70 +186,28 @@ end function init!( workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, + grid=Grid(p, rast), ) # TODO what is needed here? - return (; grid=Grid(p, rast)) -end - -# Solver init -init(::Union{Nothing,Solver}, A::AbstractMatrix) = (; F=lu(A)) -# function init(s::VectorSolver, A::AbstractMatrix) -# F = lu(A) -# Tb = Vector{eltype(A)} -# # if isthreaded(s) -# # # Create one init per thread -# # # UMFPACK `copy` shares memory but avoids workspace race conditions -# # nbuffers = Threads.nthreads() -# # [ -# # (; -# # F=(i == 1 ? F : copy(F)), -# # b=Tb(undef, size(A, 2)) -# # ) -# # for i in 1:nbuffers -# # ] -# # else -# b = Tb(undef, size(A, 2)) -# return (; F, b) -# # end -# end -function init(s::LinearSolver, A) - b = zeros(eltype(A), size(A, 2)) - # Define and initialise the linear problem - linprob = LinearProblem(A, b) - linsolve = init(linprob, s.args...; s.keywords...) - # TODO what is needed here? - nbuffers = Threads.nthreads() - # Create a channel to store problem b vectors for threads - # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ - channel = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) - for i in 1:nbuffers - # TODO fix this in LinearSolve.jl with batching - # We should not need to `deepcopy` the whole problem we - # just need to replicate the specific workspace arrays - # that will cause race conditions. - # But currently there is no parallel mode for LinearSolve.jl - # See https://github.com/SciML/LinearSolve.jl/issues/552 - put!(channel, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(A, 2)))) - end - return (; linsolve, channel, b) + return (; grid) end # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve!( workspace::NamedTuple, - s::Union{MatrixSolver,LinearSolver}, + s::MatrixSolver, cm::ConnectivityMeasure, p::AbstractProblem; verbose=false ) g = workspace.g - return map(p.graph_measures, workspace.outputs) do gm, output - compute(gm, p, ; workspace..., output) + return map(graph_measures(p), workspace.outputs) do gm, output + compute(gm, p; workspace..., output) end end function solve!( ws::NamedTuple, - solver::Union{MatrixSolver,LinearSolver}, + solver::MatrixSolver, cm::FundamentalMeasure, p::Problem; verbose=false, @@ -219,7 +220,7 @@ function solve!( end function solve!( ws::NamedTuple, - solver::VectorSolver, + solver::Union{VectorSolver,LinearSolver}, cm, p::Problem; verbose=false, @@ -236,16 +237,16 @@ function solve!( _update_targets!(target_allocs, g, 1) target_properties = (; targetidx, targetnodes, qt) target_grid = ConstructionBase.setproperties(g, target_properties) - first = true ws1 =_init_sparse(ws, solver, cm, p, target_grid; verbose) ws2 = merge(ws1, (; grid=target_grid, g=target_grid)) target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) result1 = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) target_results = Vector{typeof(result1)}(undef, length(g.targetnodes)) + target_results[1] = result1 - # solve one target at a time - for i in eachindex(g.targetnodes)[2:end] + + function run(i) target_qualities = g.target_qualities[g.targetidx[i]] _update_targets!(target_allocs, g, i) first = false @@ -256,6 +257,17 @@ function solve!( result = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) target_results[i] = result end + # solve one target at a time + if isthreaded(solver) + isthreaded(p) && error("threading at solver level not properly implemented") + # Threads.@threads for i in eachindex(g.targetnodes)[2:end] + # run(i) + # end + else + for i in eachindex(g.targetnodes)[2:end] + run(i) + end + end return _merge_to_stack(_maybe_raster(ws.outputs, g)) end @@ -287,13 +299,8 @@ function _solve!(workspace, solver, cm, dt::NamedTuple{DT}, gms::NamedTuple{GMS} compute(gm, p, grsp; workspace..., output) end end - return _combine_nested_flat(gms, nested, flat) -end -Base.@assume_effects :foldable function _combine_nested_flat( - gms::NamedTuple{GMS}, nested, flat -) where GMS # Combine nested and flat results - map(GMS) do k + return map(GMS) do k f = flat[k] if isnothing(f) map(n -> n[k], nested) @@ -398,82 +405,100 @@ function _init_sparse(ws::NamedTuple, solver, cm, p::Problem, grid::Grid; verbos return merge(ws, (; W, Pref, A, A_init, Aadj_init, Aadj, CW)) end -_workspace_size(::Solver, g) = size(g.costmatrix, 1), length(g.targetnodes) -# Vector solver is one target at a time -_workspace_size(::VectorSolver, g) = size(g.costmatrix, 1), 1 +# All targets at once +_workspace_size(::MatrixSolver, g) = size(g.costmatrix, 1), length(g.targetnodes) +# One target at a time +_workspace_size(::Union{VectorSolver,LinearSolver}, g) = size(g.costmatrix, 1), 1 isthreaded(s::Solver) = false isthreaded(s::LinearSolver) = s.threaded isthreaded(s::VectorSolver) = s.threaded -function LinearAlgebra.ldiv!(s::LinearSolver, (; linsolve, channel, b), B) +# Solver init +init(::Union{Nothing,MatrixSolver,VectorSolver}, A::AbstractMatrix) = (; F=lu(A)) +function init(solver::VectorSolver, A::AbstractMatrix) + F = lu(A) + if isthreaded(solver) + nbuffers = Threads.nthreads() + channel = Channel{typeof(F)}(nbuffers) + for _ in 1:nbuffers + put!(channel, copy(F)) + end + return channel + else + return F + end +end +function init(solver::LinearSolver, A::AbstractMatrix) + b = zeros(eltype(A), size(A, 2)) + # Define and initialise the linear problem + linprob = LinearProblem(A, b) + linsolve = init(linprob, solver.args...; solver.keywords...) + # TODO what is needed here? + # Create a channel to store problem b vectors for threads + # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ + if isthreaded(solver) + nbuffers = Threads.nthreads() + channel = Channel{Tuple{typeof(linsolve),Vector{Float64}}}(nbuffers) + for i in 1:nbuffers + # TODO fix this in LinearSolve.jl with batching + # We should not need to `deepcopy` the whole problem we + # just need to replicate the specific workspace arrays + # that will cause race conditions. + # But currently there is no parallel mode for LinearSolve.jl + # See https://github.com/SciML/LinearSolve.jl/issues/552 + put!(channel, (deepcopy(linsolve), Vector{eltype(A)}(undef, size(A, 2)))) + end + return channel + else + return linsolve + end +end + + +function LinearAlgebra.ldiv!(s::LinearSolver, init, B; B_copy) # TODO: for now we define a Z matrix, but later modify ops # to run column by column without materialising Z if isthreaded(s) - Threads.@threads for i in 1:size(B, 2) - # Get column memory from the channel - linsolve_t, b_t = take!(channel) - # Update it - b_t .= view(B, :, i) - # Update solver with new b values - reinit!(linsolve_t; b=b_t, reuse_precs=false) - sol = LinearSolve.solve(linsolve_t, s.args...; s.keywords...) - # Aim for something like this ? - # res = map(connectivity_measures(p)) do cm - # compute(cm, g, sol.u, i) - # end - # For now just use Z - B[:, i] .= sol.u - put!(channel, (linsolve_t, b_t)) - end + channel = init + # Get column memory from the channel + linsolve = take!(channel) + # Update solver with new b values + reinit!(linsolve; b=vec(B_copy), reuse_precs=true) + sol = LinearSolve.solve!(vec(B), linsolve, s.args...; s.keywords...) + vec(B) .= sol.u + put!(channel, linsolve) else - for i in 1:size(B, 2) - b .= view(B, :, i) - reinit!(linsolve; b, reuse_precs=true) - sol = LinearSolve.solve(linsolve, s.args...; s.keywords...) - # Udate the column - B[:, i] .= sol.u - end + linsolve = init + reinit!(linsolve; b=vec(B_copy), reuse_precs=true) + sol = LinearSolve.solve(linsolve, s.args...; s.keywords...) + vec(B) .= sol.u end return B end -LinearAlgebra.ldiv!(::Union{MatrixSolver,VectorSolver,Nothing}, (; F), B; B_copy=copy(B)) = +LinearAlgebra.ldiv!(::Union{MatrixSolver,Nothing}, (; F), B; B_copy=copy(B)) = ldiv!(B, F, B_copy) # LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = # ldiv!(solver, init(solver, A), B; kw...) -# function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy=nothing) -# # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) -# transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A - -# # This is basically SparseArrays.UMFPACK._AqldivB_kernel! -# # But we unroll it to avoid copies or allocation of B -# if isthreaded(s) -# channel = Channel{typeof(init[1])}(length(init)) -# for x in init -# put!(channel, x) -# end -# # Create a channel to store problem b vectors for threads -# # see https://juliafolds2.github.io/OhMyThreads.jl/stable/literate/tls/tls/ -# Threads.@threads for col in 1:size(B, 2) -# # Get a workspace from the channel -# F_t, b_t = take!(channel) -# # Copy a column from B -# b_t .= view(B, :, col) -# # Solve for the column -# SparseArrays.UMFPACK.solve!(view(B, :, col), F_t, b_t, transposeoptype) -# # Reuse the workspace -# put!(channel, (F_t, b_t)) -# end -# else -# (; F, b) = init[1] -# for col in 1:size(B, 2) -# b .= view(B, :, col) -# SparseArrays.UMFPACK.solve!(view(B, :, col), F, b, transposeoptype) -# end -# end - -# return B -# end +function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy) + # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) + transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A + + # This is basically SparseArrays.UMFPACK._AqldivB_kernel! + # But we unroll it to avoid copies or allocation of B + if isthreaded(s) + channel = init + F = take!(channel) + # Solve for the column + SparseArrays.UMFPACK.solve!(vec(B), F, vec(B_copy), transposeoptype) + # Reuse the workspace + put!(channel, F) + else + F = init + SparseArrays.UMFPACK.solve!(vec(B), F, vec(B_copy), transposeoptype) + end + return B +end # Utils # We may have multiple distance_measures per diff --git a/src/tiles.jl b/src/tiles.jl index e47b3a2..7383bef 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -43,7 +43,7 @@ centersize(p::WindowedProblem) = p.centersize, p.centersize isthreaded(p::WindowedProblem) = p.threaded function solve(p::WindowedProblem, rast::RasterStack; - verbose=false, test_windows=false, mosaic_return=true, timed=true, kw... + verbose=false, test_windows=false, mosaic_return=true, timed=false, kw... ) solve!(init(p, rast; verbose, kw...), p; verbose, test_windows, mosaic_return, timed @@ -122,7 +122,7 @@ function solve!(workspace, p::WindowedProblem; end else if timed - return (; result=output_stacks) + return (; result=output_stacks, window_elapsed) else return output_stacks end diff --git a/test/problem.jl b/test/problem.jl index eca1809..a488bd8 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -25,7 +25,7 @@ graph_measures = graph_measures = (; ch=ConScape.ConnectedHabitat(), betq=ConScape.BetweennessQweighted(), betk=ConScape.BetweennessKweighted(), - # # TODO sens=ConScape.Sensitivity(), + # TODO sens=ConScape.Sensitivity(), # ebetq=ConScape.EdgeBetweennessQweighted(), # ebetk=ConScape.EdgeBetweennessKweighted(), # mkld=ConScape.MeanKullbackLeiblerDivergence(), @@ -36,6 +36,121 @@ graph_measures = graph_measures = (; distance_transformation = (nodist=nothing, one=one, exp50=t -> exp(-t/50)) connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) +expected_layers = ( + :ch_nodist, :ch_one, :ch_exp50, + :betq, + :betk_nodist, :betk_one, :betk_exp50, + # :ebetq, + # :ebetk_nodist, :ebetk_one, :ebetk_exp50, + # :mkld, + # :mlcd, + # :eigmax_nodist, :eigmax_one, :eigmax_exp50, +) +affinities_sparse = ConScape.graph_matrix_from_raster(affinities) +test_g = ConScape.Grid(size(affinities)...; + affinities=affinities_sparse, + qualities +) +test_grsp = ConScape.GridRSP(test_g; θ) + +solvers = ( + ConScape.MatrixSolver(), + ConScape.VectorSolver(), + # ConScape.VectorSolver(; threaded=true), # Threading not implemented yet + # ConScape.LinearSolver(), # TODO: really slow currently + # ConScape.LinearSolver(; threaded=true), +) + +# solver = ConScape.VectorSolver() + +for solver in solvers + +@testset "$solver" begin + println("\n Testing with solver: ", solver) + # Basic Problem + problem = ConScape.Problem(; + graph_measures, connectivity_measure, solver, + ) + workspace = init(problem, rast; verbose=true) + pairs(workspace) + + @time result = ConScape.solve!(workspace, problem); + + # @profview result = ConScape.solve(problem, workspace) + @test size(result.ch_one) == size(rast) + @test keys(result) == expected_layers + g = workspace.grid + # Base.summarysize(workspace) / 1e6 + # ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 + + # @testset "Test mean_kl_divergence" begin + # @test ConScape.mean_kl_divergence(test_grsp) ≈ 323895.3828183995 + # @test result.mkld[] ≈ 323895.3828183995 + # end + + # @testset "mean_lc_kl_divergence" begin + # @test result.mlcd[] ≈ 1.5660600315073947e6 + # end + @testset "q-weighted" begin + @test result.betq isa Raster + @test isapprox(result.betq[21:23, 21:23], [ + 1930.1334372152335 256.91061166392745 2866.2998374065373 + 4911.996715311025 1835.991238248377 720.755518530375 + 4641.815380725279 3365.3296878569213 477.1085971945757], atol=1e-3) + end + @testset "k-weighted" begin + @test result.betk_nodist isa Raster + bet = ConScape.betweenness_kweighted(test_grsp) + @test isapprox(result.betk_nodist[21:23, 31:33], + [0.04063917813171917 0.06843246983487516 0.08862506281612659 + 0.03684621201600996 0.10352876485995872 0.1255652231824746 + 0.03190640567704462 0.13832814750469344 0.1961393152256104], atol=1e-4) + + # Check that summed edge betweennesses corresponds to node betweennesses: + # @test result.ebetk_nodist isa SparseMatrixCSC + # bet_edge_sum = fill(NaN, g.nrows, workspace.grid.ncols) + # for (i, v) in enumerate(sum(result.ebetk_nodist, dims=2)) + # bet_edge_sum[g.id_to_grid_coordinate_list[i]] = v + # end + # @test bet_edge_sum[21:23, 31:33] ≈ parent(result.betk_nodist[21:23, 31:33]) + + # TODO the floating point differnce is more + # significant here, 1e-3 is as gooda as it can get + @test isapprox(result.betk_exp50[21:23, 31:33], [ + 980.5828087688377 1307.981162399926 1602.8445739784497 + 826.0710054834001 1883.0940077789735 1935.4450344630702 + 676.9212075214159 2228.2700913772774 2884.0409495023364], atol=1e-3) + + @test result.betk_one[g.id_to_grid_coordinate_list] ≈ + result.betq[g.id_to_grid_coordinate_list] + # @test result.ebetk_one ≈ result.ebetq + end + + @testset "connected_habitat" begin + @test result.ch_nodist isa Raster{Float64} + @test size(result.ch_nodist) == size(g.source_qualities) + # TODO we need some real tests here + end +end + +end + +graph_measures = graph_measures = (; + ch=ConScape.ConnectedHabitat(), + betq=ConScape.BetweennessQweighted(), + betk=ConScape.BetweennessKweighted(), + # TODO sens=ConScape.Sensitivity(), + ebetq=ConScape.EdgeBetweennessQweighted(), + ebetk=ConScape.EdgeBetweennessKweighted(), + mkld=ConScape.MeanKullbackLeiblerDivergence(), + mlcd=ConScape.MeanLeastCostKullbackLeiblerDivergence(), + eigmax=ConScape.EigMax(), + # crit=ConScape.Criticality(), # very very slow, each target makes a new grid +) +distance_transformation = (nodist=nothing, one=one, exp50=t -> exp(-t/50)) +connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) + +# All tests for MatrixSolver expected_layers = ( :ch_nodist, :ch_one, :ch_exp50, :betq, @@ -55,14 +170,15 @@ test_grsp = ConScape.GridRSP(test_g; θ) solvers = ( ConScape.MatrixSolver(), - ConScape.VectorSolver(), + # ConScape.VectorSolver(), # ConScape.VectorSolver(; threaded=true), # ConScape.LinearSolver(), ) solver = ConScape.MatrixSolver() -solver = ConScape.VectorSolver() -# for solver in solvers +for solver in solvers + +@testset "$solver complete" begin println("\n Testing with solver: ", solver) # Basic Problem problem = ConScape.Problem(; @@ -71,31 +187,22 @@ solver = ConScape.VectorSolver() @time workspace = init(problem, rast); Z = copy(workspace.Z) @testset "initialised grids are the same" begin - # @test workspace.g.θ == test_grsp.θ foreach(propertynames(test_g)) do n @test isequal(getproperty(workspace.grid, n), getproperty(test_g, n)) end end - using Cthulhu - @descend - using BenchmarkTools - using ProfileView - ProfileView. - ConScape.solve!(workspace, problem); + result = ConScape.solve!(workspace, problem); if solver isa ConScape.MatrixSolver @test workspace.expected_costs == ConScape.expected_cost(test_grsp) - @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) + # @test workspace.free_energy_distances == ConScape.free_energy_distance(test_grsp) @test workspace.Z == test_grsp.Z + # @test result isa NamedTuple end - # @profview result = ConScape.solve(problem, workspace) - @test result isa NamedTuple @test size(result.ch_one) == size(rast) @test keys(result) == expected_layers g = workspace.grid - # Base.summarysize(workspace) / 1e6 - # ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 @testset "Test mean_kl_divergence" begin @test ConScape.mean_kl_divergence(test_grsp) ≈ 323895.3828183995 @@ -149,175 +256,4 @@ solver = ConScape.VectorSolver() end end - -graph_measures = (; - # betq=ConScape.BetweennessQweighted(), - betk=ConScape.BetweennessKweighted(), - ch=ConScape.ConnectedHabitat(), - # # TODO sens=ConScape.Sensitivity(), - # crit=ConScape.Criticality(), # very very slow, each target makes a new grid -) -# Set low alpha here so the decay is steep for testing -distance_transformation = x -> exp(-x / 2) -connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) -expected_layers = (:betk, :ch) - -solver = ConScape.MatrixSolver() -problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) -# ConScape.allocations(problem, rast) / 1e6 -solve(problem, rast; verbose=true) - -@testset "target mosaicing matches original" begin - windowed_problem = ConScape.WindowedProblem(problem; - buffer=10, centersize=5, threaded=false - ) - @test collect(ConScape._window_ranges(windowed_problem, rast)) == [ - (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) - (6:30, 1:25) (6:30, 6:30) (6:30, 11:35) (6:30, 16:40) (6:30, 21:45) (6:30, 26:50) (6:30, 31:55) (6:30, 36:59) - (11:35, 1:25) (11:35, 6:30) (11:35, 11:35) (11:35, 16:40) (11:35, 21:45) (11:35, 26:50) (11:35, 31:55) (11:35, 36:59) - (16:40, 1:25) (16:40, 6:30) (16:40, 11:35) (16:40, 16:40) (16:40, 21:45) (16:40, 26:50) (16:40, 31:55) (16:40, 36:59) - (21:44, 1:25) (21:44, 6:30) (21:44, 11:35) (21:44, 16:40) (21:44, 21:45) (21:44, 26:50) (21:44, 31:55) (21:44, 36:59) - ] - test_results = ConScape.solve(windowed_problem, rast; test_windows=true) - inner_targets = copy(rast.target_qualities) - replace!(inner_targets, NaN => 0.0) - # Edge targets are lost with windowing - inner_targets[1:10, :] .= 0 - inner_targets[:, 1:10] .= 0 - inner_targets[end-9:end, :] .= 0 - inner_targets[:, end-9:end] .= 0 - @test parent(inner_targets) == parent(test_results.target_qualities) -end - -@testset "windowed results approximate non-windowed" begin - buffer=15 - windowed_problem = ConScape.WindowedProblem(problem; - buffer, centersize=5, threaded=false - ) - mask!(rast; with=rast) - rast_inner = ConScape._get_window_with_zeroed_buffer(windowed_problem, rast, axes(rast)) - @time wp_result = ConScape.solve(windowed_problem, rast) - @time p_result = ConScape.solve(problem, rast_inner) - # plot(p_result) - # plot(wp_result) - @test maplayers(p_result, wp_result) do P, WP - broadcast(P, WP) do p, wp - isnan(p) && isnan(wp) || isapprox(p, wp; atol=1e-4) - end |> all - end |> all -end - - -# BatchProblem writes files to disk and mosaics to RasterStack - -# @testset "batch problem matches windowed problem" begin - # Use a higher alpha to catch differences - distance_transformation = x -> exp(-x / 50) - connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) - problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) - - kw = (; buffer=10, centersize=5, threaded=false) - windowed_problem = ConScape.WindowedProblem(problem; kw...) - @time workspace = ConScape.init(windowed_problem, rast); - @time windowed_result = ConScape.solve!(workspace, windowed_problem); - - batch_problem = ConScape.BatchProblem(problem; datapath=tempname(), kw...) - ConScape.solve(batch_problem, rast) - batch_result = mosaic(batch_problem; to=rast) - @test batch_result isa RasterStack - - # BatchProblem can be run as batch jobs for clusters - # We just need a new path to make sure the result is from a new run - batch_jobs_problem = ConScape.BatchProblem(problem; - datapath=tempname(), kw... - ) - assessment = ConScape.assess(batch_jobs_problem, rast) - batch_jobs_problem.centersize - @test assessment.njobs == 39 - - for job in 1:assessment.njobs - ConScape.solve(batch_jobs_problem, rast, job) - end - batch_jobs_result = mosaic(batch_jobs_problem; to=rast) - - nested_problem = ConScape.BatchProblem(windowed_problem; - datapath=tempname(), centersize=(10, 10), threaded=false - ) - ConScape.assess(nested_problem, rast) - nested_result = mosaic(nested_problem; to=rast) - @test nested_result isa RasterStack - - nested_jobs_problem = ConScape.BatchProblem(windowed_problem; - datapath=tempname(), centersize=(10, 10), threaded=false - ) - # Try one - @time workspace = ConScape.init(nested_problem, rast, 5) - @time ConScape.solve!(workspace, nested_problem) - - assessment = ConScape.assess(nested_jobs_problem, rast); - for job in 1:assessment.njobs - ConScape.solve(nested_jobs_problem, rast, job) - end - nested_jobs_result = mosaic(nested_jobs_problem; to=rast) - - @test keys(windowed_result) == - keys(nested_result) == - keys(batch_result) == - keys(batch_jobs_result) == - keys(nested_jobs_result) == - Tuple(sort(collect(expected_layers))) - - @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch)) - @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk)) - - # These may be approximate after mosaic order changes - compare(a, b) = isnan(a) && isnan(b) || isapprox(a, b) - @test all(compare.(permutedims(batch_result.ch), windowed_result.ch)) - @test all(compare.(permutedims(batch_result.betk), windowed_result.betk)) - - # TODO: there are some tiny fp differences in the nested result - @test all(map(nested_result.ch, batch_result.ch) do n, b - isnan(n) && isnan(b) || isapprox(n, b) - end) - - # plot(windowed_result) - # plot(batch_result) - # plot(batch_jobs_result) - # plot(nested_result) - # plot(nested_jobs_result) -end - - -# Scale Benchmarking... - -# windowed_problem_t1 = ConScape.WindowedProblem(problem; -# buffer=10, centersize=1, threaded=true -# ) -# windowed_problem_t2 = ConScape.WindowedProblem(problem; -# buffer=10, centersize=2, threaded=true -# ) -# windowed_problem_t4 = ConScape.WindowedProblem(problem; -# buffer=10, centersize=4, threaded=true -# ) -# windowed_problem_t6 = ConScape.WindowedProblem(problem; -# buffer=10, centersize=6, threaded=true -# ) -# length(ConScape._window_ranges(windowed_problem_t1, rast)) -# length(ConScape._window_ranges(windowed_problem_t2, rast)) -# length(ConScape._window_ranges(windowed_problem_t4, rast)) -# length(ConScape._window_ranges(windowed_problem_t6, rast)) -# using BenchmarkTools -# ConScape.solve(windowed_problem_t1, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t2, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t4, rast, verbose=false); -# @btime ConScape.solve(windowed_problem_t6, rast, verbose=false); -# @profview_allocs ConScape.solve(windowed_problem_t1, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t2, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t4, rast, verbose=false) sampling=1.0 -# @profview_allocs ConScape.solve(windowed_problem_t6, rast, verbose=false) sampling=1.0 -# @profview -# res = ConScape.solve(windowed_problem_t1, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t2, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t4, rast, verbose=false) -# @profview ConScape.solve(windowed_problem_t6, rast, verbose=false) -# res = ConScape.solve(windowed_problem_t4, rast, verbose=false) \ No newline at end of file +end \ No newline at end of file diff --git a/test/windowed.jl b/test/windowed.jl new file mode 100644 index 0000000..260b348 --- /dev/null +++ b/test/windowed.jl @@ -0,0 +1,158 @@ +using ConScape, Test, SparseArrays, LinearAlgebra +using Rasters, ArchGDAL +using ConScape.LinearSolve + +datadir = joinpath(dirname(pathof(ConScape)), "..", "data") +_tempdir = mkdir(tempname()) + +θ = 0.1 +landscape = "sno_2000" +# The way the ascii is read in is reversed and rotated from what GDAL does +affinities = reverse(rotr90(replace_missing(Raster(joinpath(datadir, "affinities_$landscape.asc")), NaN)); dims=X) +qualities = reverse(rotr90(replace_missing(Raster(joinpath(datadir, "qualities_$landscape.asc")), NaN)); dims=X) +qualities[(affinities .> 0) .& isnan.(qualities)] .= 1e-20 +rast = RasterStack((; affinities, qualities, target_qualities=qualities)) + +affinities_asc = ConScape.readasc(joinpath(datadir, "affinities_$landscape.asc"))[1] +qualities_asc = ConScape.readasc(joinpath(datadir, "qualities_$landscape.asc"))[1] +qualities_asc[(affinities_asc .> 0) .& isnan.(qualities_asc)] .= 1e-20 + +graph_measures = (; + # betq=ConScape.BetweennessQweighted(), + betk=ConScape.BetweennessKweighted(), + ch=ConScape.ConnectedHabitat(), + # # TODO sens=ConScape.Sensitivity(), + # crit=ConScape.Criticality(), # very very slow, each target makes a new grid +) +# Set low alpha here so the decay is steep for testing +distance_transformation = x -> exp(-x / 2) +connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) +expected_layers = (:betk, :ch) + +solver = ConScape.MatrixSolver() +# solver = ConScape.VectorSolver() +problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) +solve(problem, rast; verbose=true) + +@testset "target mosaicing matches original" begin + windowed_problem = ConScape.WindowedProblem(problem; + buffer=10, centersize=5, threaded=false + ) + @test collect(ConScape._window_ranges(windowed_problem, rast)) == [ + (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) + (6:30, 1:25) (6:30, 6:30) (6:30, 11:35) (6:30, 16:40) (6:30, 21:45) (6:30, 26:50) (6:30, 31:55) (6:30, 36:59) + (11:35, 1:25) (11:35, 6:30) (11:35, 11:35) (11:35, 16:40) (11:35, 21:45) (11:35, 26:50) (11:35, 31:55) (11:35, 36:59) + (16:40, 1:25) (16:40, 6:30) (16:40, 11:35) (16:40, 16:40) (16:40, 21:45) (16:40, 26:50) (16:40, 31:55) (16:40, 36:59) + (21:44, 1:25) (21:44, 6:30) (21:44, 11:35) (21:44, 16:40) (21:44, 21:45) (21:44, 26:50) (21:44, 31:55) (21:44, 36:59) + ] + test_results = ConScape.solve(windowed_problem, rast; test_windows=true) + inner_targets = copy(rast.target_qualities) + replace!(inner_targets, NaN => 0.0) + # Edge targets are lost with windowing + inner_targets[1:10, :] .= 0 + inner_targets[:, 1:10] .= 0 + inner_targets[end-9:end, :] .= 0 + inner_targets[:, end-9:end] .= 0 + @test parent(inner_targets) == parent(test_results.target_qualities) +end + +@testset "windowed results approximate non-windowed" begin + buffer=15 + windowed_problem = ConScape.WindowedProblem(problem; + buffer, centersize=5 + ) + mask!(rast; with=rast) + rast_inner = ConScape._get_window_with_zeroed_buffer(windowed_problem, rast, axes(rast)) + @time wp_result = ConScape.solve(windowed_problem, rast) + @time p_result = ConScape.solve(problem, rast_inner) + p_result + plot(p_result) + plot(wp_result) + @test maplayers(p_result, wp_result) do P, WP + broadcast(P, WP) do p, wp + isnan(p) && isnan(wp) || isapprox(p, wp; atol=1e-4) + end |> all + end |> all +end + + +# BatchProblem writes files to disk and mosaics to RasterStack +@testset "batch problem matches windowed problem" begin + solver = ConScape.VectorSolver() + # Use a higher alpha to catch differences + distance_transformation = x -> exp(-x / 50) + connectivity_measure = ConScape.ExpectedCost(; θ, distance_transformation) + problem = ConScape.Problem(; graph_measures, connectivity_measure, solver) + + kw = (; buffer=10, centersize=5) + windowed_problem = ConScape.WindowedProblem(problem; kw...) + @time workspace = ConScape.init(windowed_problem, rast); + @time windowed_result = ConScape.solve!(workspace, windowed_problem); + + batch_problem = ConScape.BatchProblem(problem; datapath=tempname(), kw...) + ConScape.solve(batch_problem, rast) + batch_result = mosaic(batch_problem; to=rast) + @test batch_result isa RasterStack + + # BatchProblem can be run as batch jobs for clusters + # We just need a new path to make sure the result is from a new run + batch_jobs_problem = ConScape.BatchProblem(problem; + datapath=tempname(), kw... + ) + assessment = ConScape.assess(batch_jobs_problem, rast) + batch_jobs_problem.centersize + @test assessment.njobs == 39 + + for job in 1:assessment.njobs + ConScape.solve(batch_jobs_problem, rast, job) + end + batch_jobs_result = mosaic(batch_jobs_problem; to=rast) + + nested_problem = ConScape.BatchProblem(windowed_problem; + datapath=tempname(), centersize=(10, 10) + ) + ConScape.assess(nested_problem, rast) + ConScape.solve(nested_problem, rast) + nested_result = mosaic(nested_problem; to=rast) + @test nested_result isa RasterStack + + nested_jobs_problem = ConScape.BatchProblem(windowed_problem; + datapath=tempname(), centersize=(10, 10) + ) + # Try one + @time workspace = ConScape.init(nested_jobs_problem, rast, 5) + @time ConScape.solve!(workspace, nested_jobs_problem) + + assessment = ConScape.assess(nested_jobs_problem, rast); + for job in 1:assessment.njobs + ConScape.solve(nested_jobs_problem, rast, job) + end + nested_jobs_result = mosaic(nested_jobs_problem; to=rast) + plot(windowed_result) + + @test keys(windowed_result) == + keys(nested_result) == + keys(batch_result) == + keys(batch_jobs_result) == + keys(nested_jobs_result) == + Tuple(sort(collect(expected_layers))) + + @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch)) + @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk)) + + # These may be approximate after mosaic order changes + compare(a, b) = isnan(a) && isnan(b) || isapprox(a, b) + @test all(compare.(permutedims(batch_result.ch), windowed_result.ch)) + @test all(compare.(permutedims(batch_result.betk), windowed_result.betk)) + + # TODO: there are some tiny fp differences in the nested result + @test all(map(nested_result.ch, batch_result.ch) do n, b + isnan(n) && isnan(b) || isapprox(n, b) + end) + + # plot(windowed_result) + # plot(batch_result) + # plot(batch_jobs_result) + # plot(nested_result) + # plot(nested_jobs_result) +end \ No newline at end of file From fa7d952ad25cec46e0b9e790b6a3509407540e06 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 13 Feb 2025 23:45:33 +0100 Subject: [PATCH 25/51] tweaks --- src/solvers.jl | 1 - src/tiles.jl | 21 +++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/solvers.jl b/src/solvers.jl index 039fb41..d0dbf13 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -103,7 +103,6 @@ function _init!( verbose=false, grid=Grid(p, rast) ) - verbose && println("Defining grid for RasterStack size $(size(rast))...") verbose && println("Retreiving measures...") g = grid gms = graph_measures(p) diff --git a/src/tiles.jl b/src/tiles.jl index 7383bef..33c59b6 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -140,10 +140,10 @@ function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; n = min(length(window_indices), p.threaded ? Threads.nthreads() : 1) # VERY important to use _get_window_with_zeroed_buffer here not just index the raster # Otherwise memory use will be TB - largest_rast = _get_window_with_zeroed_buffer(view, p, rast, window_ranges[first(sorted_indices)]) window_workspaces = if haskey(workspace, :window_workspaces) - [init!(ws, p.problem, largest_rast; verbose) for ws in window_workspaces] + [init!(ws, p.problem; verbose) for ws in window_workspaces] else + largest_rast = _get_window_with_zeroed_buffer(view, p, rast, window_ranges[first(sorted_indices)]) [init(p.problem, largest_rast; verbose) for _ in 1:n] end return (; rast, window_workspaces, window_sizes, window_ranges, window_indices, sorted_indices) @@ -255,10 +255,11 @@ function solve(p::BatchProblem, rast::RasterStack; solve(p, rast, i; window_indices, kw...) end end -solve(p::BatchProblem, rast::RasterStack, i; kw...) = - solve!(init(p, rast, i), p; kw...) +function solve(p::BatchProblem, rast::RasterStack, i; verbose=false, kw...) + solve!(init(p, rast, i; verbose, kw...), p; verbose, kw...) +end # Single batch job for running on clusters -function solve!(ws, p::BatchProblem; verbose=false, kw...) +function solve!(ws::NamedTuple, p::BatchProblem; verbose=false, kw...) # Solve for this window output = solve!(ws.workspace, p.problem; verbose) # Store the output rasters for this job to disk and return the file path @@ -267,14 +268,12 @@ end function init(p::BatchProblem, rast::RasterStack, i::Int; window_ranges=_window_ranges(p, rast), - window_indices=_window_indices(p, rast; window_ranges), + window_indices=(println("Calculating window indices, pass `window_indices` to skip... "); _window_indices(p, rast; window_ranges)), kw... ) init!((; rast, window_ranges, window_indices), p, i; kw...) end -function init!(workspace, p::BatchProblem, i::Int; - verbose=true, -) +function init!(workspace::NamedTuple, p::BatchProblem, i::Int; verbose=true) (; window_indices, window_ranges, rast) = workspace # Get the raster data for job i window = window_ranges[window_indices[i]] @@ -291,9 +290,7 @@ function init!(workspace, p::BatchProblem, i::Int; return (; rast=batch_rast, workspace=init(p.problem, batch_rast; verbose), batch=1, window) end -function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; - nthreads=Threads.nthreads(), kw... -) +function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; kw...) # Define the ranges of each window window_ranges = _window_ranges(p, rast) From c2ea5d128b9e7b85edd856750b8af3a58387a455 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 14 Feb 2025 13:43:11 +0100 Subject: [PATCH 26/51] remover Plots.jl --- Project.toml | 4 ---- src/ConScape.jl | 2 -- src/grid.jl | 50 +++++++++++++++++-------------------------------- src/gridrsp.jl | 11 +++++------ 4 files changed, 22 insertions(+), 45 deletions(-) diff --git a/Project.toml b/Project.toml index e18e7d7..fd1d031 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ version = "0.3.0" [deps] ArnoldiMethod = "ec485272-7323-5ecc-a04f-4719b315124d" -BandedMatrices = "aae01518-5342-5314-be14-df237901396f" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" @@ -12,7 +11,6 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622" @@ -21,14 +19,12 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] ArnoldiMethod = "0.0.4, 0.4" -BandedMatrices = "1" CommonSolve = "0.2" ConstructionBase = "1.5.8" DelimitedFiles = "1" Graphs = "1" LaTeXStrings = "1.1" LinearSolve = "2.38.0" -Plots = "1.4" ProgressLogging = "0.1" Rasters = "0.14" SimpleWeightedGraphs = "1.1" diff --git a/src/ConScape.jl b/src/ConScape.jl index 2ca8878..c9fd665 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -5,13 +5,11 @@ using ConstructionBase using Graphs using LinearAlgebra using LinearSolve -using Plots using ProgressLogging using Rasters using SimpleWeightedGraphs using SparseArrays using Rasters.DimensionalData -using BandedMatrices import CommonSolve import CommonSolve: solve, init diff --git a/src/grid.jl b/src/grid.jl index 2e41654..d8bfadc 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -151,23 +151,23 @@ DimensionalData.dims(g::Grid) = g.dims function Base.show(io::IO, ::MIME"text/plain", g::Grid) print(io, summary(g), " of size ", g.nrows, "x", g.ncols) end -function Base.show(io::IO, ::MIME"text/html", g::Grid) - t = string(summary(g), " of size ", g.nrows, "x", g.ncols) - write(io, "

$t

") - write(io, "
Affinities
") - show(io, MIME"text/html"(), plot_outdegrees(g)) - write(io, "
") - if g.source_qualities === g.target_qualities - write(io, "
Qualities
") - show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) - else - write(io, "
Source qualities") - show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) - write(io, "Target qualities") - show(io, MIME"text/html"(), heatmap(Matrix(g.target_qualities), yflip=true)) - write(io, "
") - end -end +# function Base.show(io::IO, ::MIME"text/html", g::Grid) +# t = string(summary(g), " of size ", g.nrows, "x", g.ncols) +# write(io, "

$t

") +# write(io, "
Affinities
") +# show(io, MIME"text/html"(), plot_outdegrees(g)) +# write(io, "
") +# if g.source_qualities === g.target_qualities +# write(io, "
Qualities
") +# show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) +# else +# write(io, "
Source qualities") +# show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) +# write(io, "Target qualities") +# show(io, MIME"text/html"(), heatmap(Matrix(g.target_qualities), yflip=true)) +# write(io, "
") +# end +# end _id_gc_list(nrows, ncols) = vec(collect(CartesianIndices((nrows, ncols)))) _unwrap(R::Raster) = parent(R) @@ -218,22 +218,6 @@ function indegrees(g::Grid; kwargs...) _maybe_raster(_fill_matrix(values, g), g) end -plot_values(g::Grid, values::Vector; kwargs...) = - _heatmap(_fill_matrix(values, g), g; kwargs...) -plot_outdegrees(g::Grid; kwargs...) = _heatmap(outdegrees(g), g; kwargs...) -plot_indegrees(g::Grid; kwargs...) = _heatmap(indegrees(g), g; kwargs...) - - -# If the grid has raster dimensions, -# plot as a raster on a spatial grid -function _heatmap(canvas, g; kwargs...) - if isnothing(dims(g)) - heatmap(canvas; yflip=true, axis=nothing, border=:none, aspect_ratio=:equal, kwargs...) - else - heatmap(Raster(canvas, dims(g)); kwargs...) - end -end - """ is_strongly_connected(g::Grid)::Bool diff --git a/src/gridrsp.jl b/src/gridrsp.jl index fe1d201..a142c89 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -36,11 +36,11 @@ _get_grid(g::Grid) = g function Base.show(io::IO, ::MIME"text/plain", grsp::GridRSP) print(io, summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) end -function Base.show(io::IO, ::MIME"text/html", grsp::GridRSP) - t = string(summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) - write(io, "

$t

") - show(io, MIME"text/html"(), plot_outdegrees(grsp.g)) -end +# function Base.show(io::IO, ::MIME"text/html", grsp::GridRSP) + # t = string(summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) + # write(io, "

$t

") + # show(io, MIME"text/html"(), grsp.g)) +# end DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) """ @@ -104,7 +104,6 @@ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; coordinate_list = g.id_to_grid_coordinate_list output[coordinate_list] .+= betvec - # display(heatmap(output)) return _maybe_raster(output, grsp) end From 1dad5b729e1f01beced4406cfae3bb2871687b46 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 14 Feb 2025 16:06:33 +0100 Subject: [PATCH 27/51] optimise assessment --- src/tiles.jl | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/tiles.jl b/src/tiles.jl index 33c59b6..13c61d6 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -344,19 +344,7 @@ function assess( verbose && println("Retrieving raster from channel...") window_rast = take!(channel) verbose && println("Copy raster data") - window_rast = open(rast) do o - if map(length, rs) == size(window_rast) - _get_window_with_zeroed_buffer!(window_rast, p, o, rs) - else - _get_window_with_zeroed_buffer(getindex, p, o, rs) - end - end - verbose && println("Skipping NaN only rasters...") - nvalid = count(_isvalid, window_rast.target_qualities) - assessments[i] = if nvalid > 0 - verbose && println(" nvalid: $nvalid") - assess(p.problem, window_rast; nthreads, kw...) - else + function empty_assesment() verbose && println(" No targets found") WindowAssessment(; shape=(0, 0), @@ -366,6 +354,29 @@ function assess( indices=Int[], ) end + # Just load the target window quickly first to avoid loading large rasters + window_view = view(rast, rs) + quick_targets = window_view.target_qualities[_target_ranges(p, window_view)...] + assessments[i] = if count(_isvalid, quick_targets) > 0 + # TODO + window_rast = open(rast) do o + if map(length, rs) == size(window_rast) + _get_window_with_zeroed_buffer!(window_rast, p, o, rs) + else + _get_window_with_zeroed_buffer(getindex, p, o, rs) + end + end + verbose && println("Skipping NaN only rasters...") + nvalid = count(_isvalid, window_rast.target_qualities) + if nvalid > 0 + verbose && println(" nvalid: $nvalid") + assess(p.problem, window_rast; nthreads, kw...) + else + empty_assesment() + end + else + empty_assesment() + end put!(channel, window_rast) end # Get mask and indices @@ -461,12 +472,13 @@ function _get_window_with_zeroed_buffer(f::Function, p::AbstractWindowedProblem, return _with_sparse_targets(p, source, source) end +_target_ranges(p, source) = map(s -> buffer(p) + 1:s - buffer(p), size(source)) + function _with_sparse_targets(p, source, dest) - b = buffer(p) tq = source.target_qualities tq_sparse = spzeros(eltype(tq), size(tq)) - center_ranges = map(s -> b+1:s-b, size(tq)) - tq_sparse[center_ranges...] = tq[center_ranges...] + target_ranges = _target_ranges(p, source) + tq_sparse[target_ranges...] = tq[target_ranges...] if !isnothing(grain(p)) tq_sparse = coarse_graining(tq_sparse, grain(p)) end From a9479b262ba9596816619f312a951c88470712cb Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 14 Feb 2025 16:47:14 +0100 Subject: [PATCH 28/51] bugfix assess --- src/tiles.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tiles.jl b/src/tiles.jl index 13c61d6..b2a25d4 100644 --- a/src/tiles.jl +++ b/src/tiles.jl @@ -341,9 +341,7 @@ function assess( Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] verbose && println("Assessing batch: $i, $rs") - verbose && println("Retrieving raster from channel...") window_rast = take!(channel) - verbose && println("Copy raster data") function empty_assesment() verbose && println(" No targets found") WindowAssessment(; @@ -355,7 +353,7 @@ function assess( ) end # Just load the target window quickly first to avoid loading large rasters - window_view = view(rast, rs) + window_view = view(rast, rs...) quick_targets = window_view.target_qualities[_target_ranges(p, window_view)...] assessments[i] = if count(_isvalid, quick_targets) > 0 # TODO @@ -366,7 +364,6 @@ function assess( _get_window_with_zeroed_buffer(getindex, p, o, rs) end end - verbose && println("Skipping NaN only rasters...") nvalid = count(_isvalid, window_rast.target_qualities) if nvalid > 0 verbose && println(" nvalid: $nvalid") From 935d7299b536ee31de745af6dd797e1cda38febf Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 17 Feb 2025 12:46:57 +0100 Subject: [PATCH 29/51] reorganise --- src/ConScape.jl | 2 ++ src/grid.jl | 19 ------------------- src/operations.jl | 0 src/path_distributions.jl | 0 src/transformations.jl | 35 +++++++++++++++++++++++++++++++++++ test/windowed.jl | 10 +++++----- 6 files changed, 42 insertions(+), 24 deletions(-) delete mode 100644 src/operations.jl delete mode 100644 src/path_distributions.jl create mode 100644 src/transformations.jl diff --git a/src/ConScape.jl b/src/ConScape.jl index c9fd665..fef7900 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -39,6 +39,8 @@ abstract type Solver end # Randomized shortest path algorithms include("randomizedshortestpath.jl") # Grid struct and methods +include("transformations.jl") +# Grid struct and methods include("grid.jl") # GridRSP (randomized shortest path) struct and methods include("gridrsp.jl") diff --git a/src/grid.jl b/src/grid.jl index d8bfadc..fafd221 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -1,22 +1,3 @@ -abstract type Transformation end -struct MinusLog <: Transformation end -struct ExpMinus <: Transformation end -struct Inv <: Transformation end -struct OddsAgainst <: Transformation end -struct OddsFor <: Transformation end - -(::MinusLog)(x::Number) = -log(x) -(::ExpMinus)(x::Number) = exp(-x) -(::Inv)(x::Number) = inv(x) -(::OddsAgainst)(x::Number) = inv(x) - 1 -(::OddsFor)(x::Number) = x/(1 - x) - -Base.inv(::MinusLog) = ExpMinus() -Base.inv(::ExpMinus) = MinusLog() -Base.inv(::Inv) = Inv() -Base.inv(::OddsAgainst) = OddsFor() -Base.inv(::OddsFor) = OddsAgainst() - struct Grid{D<:Union{Tuple,Nothing},SQ,TQ} nrows::Int ncols::Int diff --git a/src/operations.jl b/src/operations.jl deleted file mode 100644 index e69de29..0000000 diff --git a/src/path_distributions.jl b/src/path_distributions.jl deleted file mode 100644 index e69de29..0000000 diff --git a/src/transformations.jl b/src/transformations.jl new file mode 100644 index 0000000..ac89c25 --- /dev/null +++ b/src/transformations.jl @@ -0,0 +1,35 @@ + +""" + Transformation + +Abstrct supertype for distance transformation functions. +""" +abstract type Transformation end + +struct MinusLog <: Transformation end +struct ExpMinus <: Transformation end +struct Inv <: Transformation end +struct OddsAgainst <: Transformation end +struct OddsFor <: Transformation end +struct ExpMinusAlpha{T} <: Transformation + alpha::T +end +struct MinusLogAlpha{T} <: Transformation + alpha::T +end + +(::MinusLog)(x::Number) = -log(x) +(::ExpMinus)(x::Number) = exp(-x) +(::Inv)(x::Number) = inv(x) +(::OddsAgainst)(x::Number) = inv(x) - 0 +(::OddsFor)(x::Number) = x/(0 - x) +(t::ExpMinusAlpha)(x::Number) = exp(-x / t.alpha) +# (t::MinusLogAlpha)(x::Number) = -log(x * t.alpha) TODO: what is the inverse of ExpMinusAlpha + +Base.inv(::MinusLog) = ExpMinus() +Base.inv(::ExpMinus) = MinusLog() +Base.inv(::Inv) = Inv() +Base.inv(::OddsAgainst) = OddsFor() +Base.inv(::OddsFor) = OddsAgainst() +Base.inv(t::MinusLogAlpha) = ExpMinus(t.alpha) +Base.inv(t::ExpMinusAlpha) = MinusLog(t.alpha) diff --git a/test/windowed.jl b/test/windowed.jl index 260b348..fcde9a9 100644 --- a/test/windowed.jl +++ b/test/windowed.jl @@ -1,5 +1,5 @@ using ConScape, Test, SparseArrays, LinearAlgebra -using Rasters, ArchGDAL +using Rasters, ArchGDAL, Plots using ConScape.LinearSolve datadir = joinpath(dirname(pathof(ConScape)), "..", "data") @@ -66,8 +66,8 @@ end @time wp_result = ConScape.solve(windowed_problem, rast) @time p_result = ConScape.solve(problem, rast_inner) p_result - plot(p_result) - plot(wp_result) + # plot(p_result) + # plot(wp_result) @test maplayers(p_result, wp_result) do P, WP broadcast(P, WP) do p, wp isnan(p) && isnan(wp) || isapprox(p, wp; atol=1e-4) @@ -77,7 +77,7 @@ end # BatchProblem writes files to disk and mosaics to RasterStack -@testset "batch problem matches windowed problem" begin +# @testset "batch problem matches windowed problem" begin solver = ConScape.VectorSolver() # Use a higher alpha to catch differences distance_transformation = x -> exp(-x / 50) @@ -104,7 +104,7 @@ end @test assessment.njobs == 39 for job in 1:assessment.njobs - ConScape.solve(batch_jobs_problem, rast, job) + ConScape.solve(batch_jobs_problem, rast, job; window_indices=assessment.indices) end batch_jobs_result = mosaic(batch_jobs_problem; to=rast) From 8dd54f073518ea4f9ba84e5e0c2a66802531a399 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 17 Feb 2025 13:12:03 +0100 Subject: [PATCH 30/51] move tiles to windows.jl --- src/ConScape.jl | 2 +- src/{tiles.jl => windows.jl} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{tiles.jl => windows.jl} (100%) diff --git a/src/ConScape.jl b/src/ConScape.jl index fef7900..89c169c 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -52,6 +52,6 @@ include("graph_measure.jl") include("connectivity_measure.jl") include("problem.jl") include("solvers.jl") -include("tiles.jl") +include("windows.jl") end diff --git a/src/tiles.jl b/src/windows.jl similarity index 100% rename from src/tiles.jl rename to src/windows.jl From 35fe615e5a6e9f8fb30bdb60ffd507be8ded1b1b Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 17 Feb 2025 13:17:13 +0100 Subject: [PATCH 31/51] format everything --- src/ConScape.jl | 11 +-- src/connectivity_measure.jl | 30 ++++---- src/graph_measure.jl | 22 +++--- src/grid.jl | 62 ++++++++-------- src/gridrsp.jl | 94 ++++++++++++------------ src/problem.jl | 10 +-- src/randomizedshortestpath.jl | 130 +++++++++++++++++----------------- src/solvers.jl | 114 ++++++++++++++--------------- src/transformations.jl | 34 ++++----- src/windows.jl | 56 +++++++-------- 10 files changed, 282 insertions(+), 281 deletions(-) diff --git a/src/ConScape.jl b/src/ConScape.jl index 89c169c..ede890d 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -19,12 +19,12 @@ abstract type ConnectivityFunction <: Function end abstract type DistanceFunction <: ConnectivityFunction end abstract type ProximityFunction <: ConnectivityFunction end -struct least_cost_distance <: DistanceFunction end -struct expected_cost <: DistanceFunction end -struct free_energy_distance <: DistanceFunction end +struct least_cost_distance <: DistanceFunction end +struct expected_cost <: DistanceFunction end +struct free_energy_distance <: DistanceFunction end -struct survival_probability <: ProximityFunction end -struct power_mean_proximity <: ProximityFunction end +struct survival_probability <: ProximityFunction end +struct power_mean_proximity <: ProximityFunction end # Need to define before loading files @@ -48,6 +48,7 @@ include("gridrsp.jl") include("io.jl") # Utilities include("utils.jl") +# Problems include("graph_measure.jl") include("connectivity_measure.jl") include("problem.jl") diff --git a/src/connectivity_measure.jl b/src/connectivity_measure.jl index 9fb153e..d36e183 100644 --- a/src/connectivity_measure.jl +++ b/src/connectivity_measure.jl @@ -6,23 +6,23 @@ abstract type FundamentalMeasure <: ConnectivityMeasure end abstract type DistanceMeasure <: FundamentalMeasure end struct LeastCostDistance <: ConnectivityMeasure end -@kwdef struct ExpectedCost{T<:Union{Real,Nothing},CM} <: DistanceMeasure - θ::T=nothing - distance_transformation::CM=nothing - approx::Bool=false +@kwdef struct ExpectedCost{T<:Union{Real,Nothing},CM} <: DistanceMeasure + θ::T = nothing + distance_transformation::CM = nothing + approx::Bool = false end -@kwdef struct FreeEnergyDistance{T<:Union{Real,Nothing},CM} <: DistanceMeasure - θ::T=nothing - distance_transformation::CM=nothing - approx::Bool=false +@kwdef struct FreeEnergyDistance{T<:Union{Real,Nothing},CM} <: DistanceMeasure + θ::T = nothing + distance_transformation::CM = nothing + approx::Bool = false end -@kwdef struct SurvivalProbability{T<:Union{Real,Nothing}} <: FundamentalMeasure - θ::T=nothing - approx::Bool=false +@kwdef struct SurvivalProbability{T<:Union{Real,Nothing}} <: FundamentalMeasure + θ::T = nothing + approx::Bool = false end -@kwdef struct PowerMeanProximity{T<:Union{Real,Nothing}} <: FundamentalMeasure - θ::T=nothing - approx::Bool=false +@kwdef struct PowerMeanProximity{T<:Union{Real,Nothing}} <: FundamentalMeasure + θ::T = nothing + approx::Bool = false end keywords(cm::ConnectivityMeasure) = _keywords(cm) @@ -38,5 +38,5 @@ connectivity_function(::SurvivalProbability) = survival_probability connectivity_function(::PowerMeanProximity) = power_mean_proximity # This is not used yet but could be -compute(cm::ConnectivityMeasure, g; kw...) = +compute(cm::ConnectivityMeasure, g; kw...) = connectivity_function(m)(g; keywords(cm)..., kw...) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 4c139cb..efd1fcd 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -10,7 +10,7 @@ abstract type ReturnType end struct ReturnsDenseSpatial <: ReturnType end struct ReturnsSparse <: ReturnType end struct ReturnsScalar <: ReturnType end -struct ReturnsOther{F} <: ReturnType +struct ReturnsOther{F} <: ReturnType f::F end @@ -41,14 +41,14 @@ struct EdgeBetweennessQweighted <: BetweennessMeasure end @kwdef struct ConnectedHabitat <: GraphMeasure end -@kwdef struct Criticality{AV,QT,QS} <: PerturbationMeasure - avalue::AV=floatmin() - qˢvalue::QS=0.0 - qᵗvalue::QT=0.0 +@kwdef struct Criticality{AV,QT,QS} <: PerturbationMeasure + avalue::AV = floatmin() + qˢvalue::QS = 0.0 + qᵗvalue::QT = 0.0 end @kwdef struct EigMax{T} <: TopologicalMeasure - tol::T=1e-14 + tol::T = 1e-14 end struct MeanLeastCostKullbackLeiblerDivergence <: PathDistributionMeasure end @@ -77,14 +77,14 @@ graph_function(m::EdgeBetweennessQweighted) = edge_betweenness_qweighted graph_function(m::EigMax) = eigmax # Get function keywords -keywords(gm::GraphMeasure, p::AbstractProblem) = +keywords(gm::GraphMeasure, p::AbstractProblem) = (; _keywords(gm)..., solver=solver(p), _connectivity_keywords(gm, p)...) -keywords(gm::ConnectedHabitat, p::AbstractProblem) = +keywords(gm::ConnectedHabitat, p::AbstractProblem) = (; _keywords(gm)..., approx=connectivity_measure(p).approx, solver=solver(p), _connectivity_keywords(gm, p)...) function _connectivity_keywords(gm::GraphMeasure, p::AbstractProblem) cm = connectivity_measure(p) if needs_connectivity(gm) - (; + (; _keywords(gm)..., distance_transformation=distance_transformation(cm), connectivity_function=connectivity_function(cm) @@ -126,7 +126,7 @@ needs_Aaj_init(::GraphMeasure) = true # TODO which dont? hastrait(t, gms) = mapreduce(t, |, gms; init=false) # compute: run the function -compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = +compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = graph_function(gm)(g; keywords(gm, p)..., kw...) @@ -137,5 +137,5 @@ function count_workspaces(p::AbstractProblem) max(n, 2) end end -count_permuted_workspaces(p::AbstractProblem) = +count_permuted_workspaces(p::AbstractProblem) = mapreduce(needs_permuted_workspaces, max, graph_measures(p)) \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index fafd221..c46cee9 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -31,24 +31,24 @@ it is possible to supply a matrix to `costs` directly. If `prune=true` (the defa affinity and cost matrices will be pruned to exclude unreachable nodes. """ function Grid(nrows::Integer, - ncols::Integer; - affinities=nothing, - qualities::AbstractMatrix=ones(nrows, ncols), - source_qualities::AbstractMatrix=qualities, - target_qualities::AbstractMatrix=qualities, - costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), - prune=true) + ncols::Integer; + affinities=nothing, + qualities::AbstractMatrix=ones(nrows, ncols), + source_qualities::AbstractMatrix=qualities, + target_qualities::AbstractMatrix=qualities, + costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), + prune=true) if affinities === nothing throw(ArgumentError("matrix of affinities must be supplied")) end - if nrows*ncols != LinearAlgebra.checksquare(affinities) + if nrows * ncols != LinearAlgebra.checksquare(affinities) n = size(affinities, 1) throw(ArgumentError("grid size ($nrows, $ncols) is incompatible with size of affinity matrix ($n, $n)")) end - _source_qualities = convert(Matrix{Float64} , _unwrap(source_qualities)) + _source_qualities = convert(Matrix{Float64}, _unwrap(source_qualities)) _target_qualities = convert(AbstractMatrix{Float64}, _unwrap(target_qualities)) # Prune @@ -79,7 +79,7 @@ function Grid(nrows::Integer, # affinity_digraph = SimpleDiGraph(affinities) # if ne(difference(cost_digraph, affinity_digraph)) > 0 - # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) # end targetidx, targetnodes = _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) @@ -108,22 +108,22 @@ function Grid(nrows::Integer, return g end end -function Grid(rast::RasterStack; - qualities=get(rast, :qualities) do +function Grid(rast::RasterStack; + qualities=get(rast, :qualities) do ones(size(rast)) end, affinities=let - affinities_raster = get(rast, :affinities, nothing) + affinities_raster = get(rast, :affinities, nothing) ConScape.graph_matrix_from_raster(affinities_raster) end, source_qualities=get(rast, :source_qualities, qualities), - target_qualities=get(rast, :target_qualities, qualities), + target_qualities=get(rast, :target_qualities, qualities), kw... ) - Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, kw...) + Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, kw...) end # TODO move functions like MinusLog to problems and pass in here -Grid(p::AbstractProblem, rast::RasterStack; kw...) = +Grid(p::AbstractProblem, rast::RasterStack; kw...) = Grid(rast; costs=costs(p), prune=prune(p), kw...) Base.size(g::Grid) = (g.nrows, g.ncols) @@ -160,7 +160,7 @@ _targetidx(q::Raster, grididxs::AbstractVector) = _targetidx(parent(q), grididxs _targetidx(q::SparseMatrixCSC, grididxs::AbstractVector) = CartesianIndex.(findnz(q)[1:2]...) ∩ grididxs -_targetidx_and_nodes(g::Grid) = +_targetidx_and_nodes(g::Grid) = _targetidx_and_nodes(g.target_qualities, g.id_to_grid_coordinate_list) function _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) targetidx = _targetidx(target_qualities, id_to_grid_coordinate_list) @@ -168,7 +168,7 @@ function _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) # n = findfirst(==(id_to_grid_coordinate_list[1]), targetnodes) # targetnodes[1] = n # for i in eachindex(id_to_grid_coordinate_list)[2:end] - # findnext(==(id_to_grid_coordinate_list[i]), targetnodes, n) + # findnext(==(id_to_grid_coordinate_list[i]), targetnodes, n) # end targetnodes = findall( t -> t ∈ targetidx, @@ -176,7 +176,7 @@ function _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) return targetidx, targetnodes end -function _fill_matrix(values, g) +function _fill_matrix(values, g) M = fill(NaN, g.nrows, g.ncols) for (i, v) in enumerate(values) M[g.id_to_grid_coordinate_list[i]] = v @@ -245,7 +245,7 @@ function largest_subgraph(g::Grid) # ndiffnodes = size(g.costmatrix, 1) - length(scci) # if ndiffnodes > 0 - # @info "removing $ndiffnodes nodes from affinity and cost graphs" + # @info "removing $ndiffnodes nodes from affinity and cost graphs" # end # Extract the adjacency matrix of the largest subgraph @@ -336,8 +336,8 @@ A helper-function, used by coarse_graining, that computes the sum of pixels with """ sum_neighborhood(g, rc, npix) = sum_neighborhood(g.target_qualities, rc, npix) function sum_neighborhood(target_qualities::AbstractMatrix, rc, npix) - getrows = (rc[1] - floor(Int, npix/2)):(rc[1] + (ceil(Int, npix/2) - 1)) - getcols = (rc[2] - floor(Int, npix/2)):(rc[2] + (ceil(Int, npix/2) - 1)) + getrows = (rc[1]-floor(Int, npix / 2)):(rc[1]+(ceil(Int, npix / 2)-1)) + getcols = (rc[2]-floor(Int, npix / 2)):(rc[2]+(ceil(Int, npix / 2)-1)) # pixels outside of the landscape are encoded with NaNs but we don't want # the NaNs to propagate to the coarse grained values return sum(t -> isnan(t) ? 0.0 : t, target_qualities[getrows, getcols]) @@ -349,7 +349,7 @@ end Creates a sparse matrix of target qualities for the landmarks based on merging npix pixels into the center pixel. """ function coarse_graining(g, npix) - coarse_graining(g.target_qualities, npix; + coarse_graining(g.target_qualities, npix; id_to_grid_coordinate_list=g.id_to_grid_coordinate_list ) end @@ -365,16 +365,16 @@ function coarse_graining(M::AbstractMatrix, npix; id_to_grid_coordinate_list=_id_gc_list(size(M)...) ) nrows, ncols = size(M) - getrows = (floor(Int, npix/2)+1):npix:(nrows-ceil(Int, npix/2)+1) - getcols = (floor(Int, npix/2)+1):npix:(ncols-ceil(Int, npix/2)+1) + getrows = (floor(Int, npix / 2)+1):npix:(nrows-ceil(Int, npix / 2)+1) + getcols = (floor(Int, npix / 2)+1):npix:(ncols-ceil(Int, npix / 2)+1) coarse_target_rc = Base.product(getrows, getcols) coarse_target_ids = vec( [ - findfirst( - isequal(CartesianIndex(ij)), - id_to_grid_coordinate_list - ) for ij in coarse_target_rc - ] + findfirst( + isequal(CartesianIndex(ij)), + id_to_grid_coordinate_list + ) for ij in coarse_target_rc + ] ) coarse_target_rc = [ij for ij in coarse_target_rc if !ismissing(ij)] filter!(!ismissing, coarse_target_ids) @@ -494,4 +494,4 @@ power_mean_proximity( g::Grid; θ::Union{Real,Nothing}=nothing, approx::Bool=false -) = survival_probability(g; θ=θ, approx=approx) .^ (1/θ) \ No newline at end of file +) = survival_probability(g; θ=θ, approx=approx) .^ (1 / θ) \ No newline at end of file diff --git a/src/gridrsp.jl b/src/gridrsp.jl index a142c89..56c51d2 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -14,16 +14,16 @@ Construct a GridRSP from a `g::Grid` based on the inverse temperature parameter """ function GridRSP(g::Grid; θ=nothing, verbose=true) Pref = _Pref(g.affinities) - W = _W(Pref, θ, g.costmatrix) + W = _W(Pref, θ, g.costmatrix) @debug("Computing fundamental matrix of non-absorbing paths (Z). Please be patient...") - Z = (I - W)\Matrix(sparse(g.targetnodes, - 1:length(g.targetnodes), - 1.0, - size(g.costmatrix, 1), - length(g.targetnodes))) + Z = (I - W) \ Matrix(sparse(g.targetnodes, + 1:length(g.targetnodes), + 1.0, + size(g.costmatrix, 1), + length(g.targetnodes))) # Check that values in Z are not too small: - verbose && if minimum(Z)*minimum(nonzeros(g.costmatrix .* W)) == 0 + verbose && if minimum(Z) * minimum(nonzeros(g.costmatrix .* W)) == 0 @warn "Warning: Z-matrix contains too small values, which can lead to inaccurate results! Check that the graph is connected or try decreasing θ." end @@ -31,15 +31,15 @@ function GridRSP(g::Grid; θ=nothing, verbose=true) end _get_grid(grsp::GridRSP) = grsp.g -_get_grid(g::Grid) = g +_get_grid(g::Grid) = g function Base.show(io::IO, ::MIME"text/plain", grsp::GridRSP) print(io, summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) end # function Base.show(io::IO, ::MIME"text/html", grsp::GridRSP) - # t = string(summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) - # write(io, "

$t

") - # show(io, MIME"text/html"(), grsp.g)) +# t = string(summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) +# write(io, "

$t

") +# show(io, MIME"text/html"(), grsp.g)) # end DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) @@ -48,7 +48,7 @@ DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) Compute RSP betweenness of all nodes weighted by source and target qualities. """ -function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; +function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; output=fill(NaN, g.nrows, g.ncols), kw... ) @@ -90,7 +90,7 @@ The optional `diagvalue` element specifies which value to use for the diagonal of the matrix of proximities, i.e. after applying the inverse cost function to the matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. """ -function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; +function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; output=fill(NaN, size(grsp.g)), proximities=nothing, kw... @@ -115,13 +115,13 @@ end of proximities, i.e. after applying the inverse cost function to the matrix of expected costs. When nothing is specified, the diagonal elements won't be adjusted. """ -function edge_betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; - proximities=nothing, +function edge_betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; + proximities=nothing, distance_transformation=nothing, - diagvalue=nothing, + diagvalue=nothing, kw... ) - if isnothing(distance_transformation) + if isnothing(distance_transformation) distance_transformation = inv(grsp.g.costfunction) end # TODO why does this only use `expected_cost`? @@ -164,7 +164,7 @@ least_cost_distance(grsp::Union{GridRSP,NamedTuple}; kw...) = least_cost_distanc Compute the mean Kullback–Leibler divergence between the free energy distances and the RSP expected costs for `grsp::GridRSP`. """ -function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; +function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; free_energy_distances=nothing, expected_costs=nothing, kw... @@ -183,7 +183,7 @@ function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; return mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; kw...) end -function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; +function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; workspaces=(similar(grsp.Z),), kw... ) g = grsp.g @@ -197,7 +197,7 @@ end Compute the mean Kullback–Leibler divergence between the least-cost path and the random path distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. """ -function mean_lc_kl_divergence(grsp::Union{GridRSP,NamedTuple}; +function mean_lc_kl_divergence(grsp::Union{GridRSP,NamedTuple}; workspaces=[similar(grsp.Z)], kw... ) @@ -207,7 +207,7 @@ function mean_lc_kl_divergence(grsp::Union{GridRSP,NamedTuple}; cost_weighted_digraph = SimpleWeightedDiGraph(C) n = size(C, 1) from = Array{Int}(undef, n) - kl_div = Array{Float64}(undef, n) + kl_div = Array{Float64}(undef, n) # Previously # div = hcat([least_cost_kl_divergence(C, grsp.Pref, i; cost_weighted_digraph, from, kl_div, kw...) for i in g.targetnodes]...) div = workspace1 @@ -241,7 +241,7 @@ function least_cost_kl_divergence(C::SparseMatrixCSC, Pref::SparseMatrixCSC, tar for i in 1:n fromᵢ = from[i] - toᵢ = to[i] + toᵢ = to[i] notdone |= fromᵢ != toᵢ if fromᵢ == toᵢ continue @@ -361,13 +361,13 @@ function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; return _maybe_raster(output, grsp) end function connected_habitat(grsp::Union{GridRSP,NamedTuple}, - cell::CartesianIndex{2}; - distance_transformation=nothing, - diagvalue=nothing, - avalue=floatmin(), # smallest non-zero value - qˢvalue=0.0, - qᵗvalue=0.0, - kw...) + cell::CartesianIndex{2}; + distance_transformation=nothing, + diagvalue=nothing, + avalue=floatmin(), # smallest non-zero value + qˢvalue=0.0, + qᵗvalue=0.0, + kw...) g = grsp.g @@ -397,18 +397,18 @@ function connected_habitat(grsp::Union{GridRSP,NamedTuple}, newqt = [newtarget_qualities[i] for i in g.id_to_grid_coordinate_list ∩ newtargetidx] newg = Grid(g.nrows, - g.ncols, - affinities, - g.costfunction, - g.costfunction === nothing ? g.costmatrix : mapnz(g.costfunction, affinities), - g.id_to_grid_coordinate_list, - newsource_qualities, - newtarget_qualities, - newtargetidx, - newtargetnodes, - newqs, - newqt, - dims(g)) + g.ncols, + affinities, + g.costfunction, + g.costfunction === nothing ? g.costmatrix : mapnz(g.costfunction, affinities), + g.id_to_grid_coordinate_list, + newsource_qualities, + newtarget_qualities, + newtargetidx, + newtargetnodes, + newqs, + newqt, + dims(g)) newh = GridRSP(newg; θ=grsp.θ) @@ -485,7 +485,7 @@ function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; p₁ = setdiff(1:n, g.targetnodes) # use an Arnoldi based eigensolver to compute the largest (absolute) eigenvalue and right vector (of submatrix) - Fps = partialschur(qSq₀₀, nev=1, tol=tol) + Fps = partialschur(qSq₀₀, nev=1, tol=tol) λ₀, vʳ₀ = partialeigen(Fps[1]) # Some notes on handling intended or unintended landmarks. When the Grid includes landmarks, @@ -540,10 +540,10 @@ function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; # construct full right vector vʳ = fill(NaN, n) vʳ[g.targetnodes] = vʳ₀ - vʳ[p₁] = view(qSq, p₁, :) *vʳ₀ / λ₀[1] + vʳ[p₁] = view(qSq, p₁, :) * vʳ₀ / λ₀[1] # compute left vector (of submatrix) by shift-invert - Flu = lu(qSq₀₀ - λ₀[1]*I) + Flu = lu(qSq₀₀ - λ₀[1] * I) vˡ₀ = ldiv!(Flu', rand(length(g.targetidx))) rmul!(vˡ₀, inv(vˡ₀[1])) @@ -578,12 +578,12 @@ function criticality(grsp::Union{GridRSP,NamedTuple}; ) g = grsp.g nl = length(g.targetidx) - reference_connected_habitat = sum(connected_habitat(grsp; + reference_connected_habitat = sum(connected_habitat(grsp; distance_transformation, diagvalue, kw... )) critvec = fill(reference_connected_habitat, nl) - @progress name="Computing criticality..." for i in 1:nl + @progress name = "Computing criticality..." for i in 1:nl critvec[i] = sum(connected_habitat(grsp, g.targetidx[i]; distance_transformation, diagvalue, avalue, qˢvalue, qᵗvalue, kw... )) @@ -594,7 +594,7 @@ function criticality(grsp::Union{GridRSP,NamedTuple}; return _maybe_raster(output, grsp) end -function _computeproximities(grsp; +function _computeproximities(grsp; connectivity_function=expected_cost, distance_transformation=nothing, diagvalue=nothing, diff --git a/src/problem.jl b/src/problem.jl index 8c8e6d9..2432542 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -30,10 +30,10 @@ to be run in the same job. @kwdef struct Problem{GM,CM<:ConnectivityMeasure,SM<:Solver,DV,CO} <: AbstractProblem graph_measures::GM connectivity_measure::CM = LeastCostDistance() - solver::SM= MatrixSolver() - diagvalue::DV=nothing - costs::CO=MinusLog() - prune::Bool=true + solver::SM = MatrixSolver() + diagvalue::DV = nothing + costs::CO = MinusLog() + prune::Bool = true end Problem(graph_measures::Union{Tuple,NamedTuple}; kw...) = Problem(; graph_measures, kw...) @@ -57,7 +57,7 @@ isthreaded(p::Problem) = p.threaded solve(p::Problem, rast::RasterStack; kw...) = solve!(init(p, rast; kw...), p; kw...) -solve!(workspace::NamedTuple, p::Problem; kw...) = +solve!(workspace::NamedTuple, p::Problem; kw...) = solve!(workspace, solver(p), connectivity_measure(p), p; kw...) # Init is conditional on solver and connectivity measure diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index d5ebfe2..fe3b15d 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -32,14 +32,14 @@ function _W(Pref::SparseMatrixCSC, θ::Real, C::SparseMatrixCSC) end function RSP_betweenness_qweighted(W::SparseMatrixCSC, - Z::AbstractMatrix, - qˢ::AbstractVector, - qᵗ::AbstractVector, - targetnodes::AbstractVector; + Z::AbstractMatrix, + qˢ::AbstractVector, + qᵗ::AbstractVector, + targetnodes::AbstractVector; Zⁱ=_inv(Z), workspaces=[similar(Z), similar(Z)], solver=nothing, - Aadj = (I - W)', + Aadj=(I - W)', Aadj_init=init(solver, Aadj), kw... ) @@ -49,7 +49,7 @@ function RSP_betweenness_qweighted(W::SparseMatrixCSC, qˢZⁱqᵗ .= qˢ .* Zⁱ .* qᵗ' sumqˢ = sum(qˢ) for j in axes(Z, 2) - qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] + qˢZⁱqᵗ[targetnodes[j], j] -= sumqˢ * qᵗ[j] * Zⁱ[targetnodes[j], j] end # TODO adjoint of LinearSolver? @@ -62,11 +62,11 @@ end function RSP_betweenness_kweighted(W::SparseMatrixCSC, - Z::AbstractMatrix, # Fundamental matrix of non-absorbing paths - qˢ::AbstractVector, # Source qualities - qᵗ::AbstractVector, # Target qualities - S::AbstractMatrix, # Matrix of proximities - landmarks::AbstractVector; + Z::AbstractMatrix, # Fundamental matrix of non-absorbing paths + qˢ::AbstractVector, # Source qualities + qᵗ::AbstractVector, # Target qualities + S::AbstractMatrix, # Matrix of proximities + landmarks::AbstractVector; Zⁱ=_inv(Z), workspaces=[similar(Z)], solver=nothing, @@ -118,10 +118,10 @@ function RSP_betweenness_kweighted(W::SparseMatrixCSC, end function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, - Z::AbstractMatrix, - qˢ::AbstractVector, - qᵗ::AbstractVector, - targetnodes::AbstractVector; + Z::AbstractMatrix, + qˢ::AbstractVector, + qᵗ::AbstractVector, + targetnodes::AbstractVector; solver=nothing, Zⁱ=_inv(Z), workspaces=[similar(Z), similar(Z), similar(Z)], @@ -157,9 +157,9 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, # ZᵀZⁱ_minus_diag = Z[:,i]'*qˢZⁱqᵗ .- sumqˢ.* (Z[:,i].*diag(Zⁱ).*qᵗ)' for (j, x) in enumerate(view(W, i, :)) - x > 0 || continue + x > 0 || continue # edge_betweennesses[i,j] = W[i,j] .* Zqt[j,:]'* (ZᵀZⁱ_minus_diag * Z[j,:])[1] - edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(RHS, :, i))[1] + edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)'*view(RHS, :, i))[1] end end @@ -167,11 +167,11 @@ function RSP_edge_betweenness_qweighted(W::SparseMatrixCSC, end function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, - Z::AbstractMatrix, - qˢ::AbstractVector, - qᵗ::AbstractVector, - K::AbstractMatrix, # Matrix of proximities - targetnodes::AbstractVector; + Z::AbstractMatrix, + qˢ::AbstractVector, + qᵗ::AbstractVector, + K::AbstractMatrix, # Matrix of proximities + targetnodes::AbstractVector; solver=nothing, workspaces=[similar(Z), similar(Z)], permuted_workspaces=(similar(Z'),), @@ -204,8 +204,8 @@ function RSP_edge_betweenness_kweighted(W::SparseMatrixCSC, # ZᵀZⁱ_minus_diag = Z[:,i]'*K̂ .- (k.*Z[targetnodes,i].*diag(Zⁱ))' for (j, x) in enumerate(view(W, i, :)) - x > 0 || continue - edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)' * view(K̂ᵀZ_minus_diag, :, i))[1] + x > 0 || continue + edge_betweennesses[i, j] = W[i, j] .* (view(Z, j, :)'*view(K̂ᵀZ_minus_diag, :, i))[1] end end @@ -214,9 +214,9 @@ end function RSP_expected_cost(W::SparseMatrixCSC, - C::SparseMatrixCSC, - Z::AbstractMatrix, - landmarks::AbstractVector; + C::SparseMatrixCSC, + Z::AbstractMatrix, + landmarks::AbstractVector; solver=nothing, A=(I - W), A_init=init(solver, A), @@ -233,7 +233,7 @@ function RSP_expected_cost(W::SparseMatrixCSC, throw(DimensionMismatch("")) end if axes(Z, 2) != axes(landmarks, 1) - Z = Z[:,landmarks] + Z = Z[:, landmarks] end @@ -256,14 +256,14 @@ function RSP_expected_cost(W::SparseMatrixCSC, # TODO clarify what this does for j in axes(Z, 2) - dˢ[j] = C̄[landmarks[j], j] + dˢ[j] = C̄[landmarks[j], j] end C̄ .-= dˢ' return copyto!(expected_costs, C̄) end -function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; - survival_probability=nothing, +function RSP_free_energy_distance(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; + survival_probability=nothing, free_energy_distances=similar(Z), kw... ) @@ -279,10 +279,10 @@ function RSP_survival_probability(Z::AbstractMatrix, θ::Real, landmarks::Abstra Z .* inv.([Z[i, j] for (j, i) in enumerate(landmarks)])' end -function RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; +function RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::AbstractVector; survival_probability=nothing, kw... ) - survival_probability = if isnothing(survival_probability) + survival_probability = if isnothing(survival_probability) RSP_survival_probability(Z, θ, landmarks; kw...) else survival_probability @@ -291,11 +291,11 @@ function RSP_power_mean_proximity(Z::AbstractMatrix, θ::Real, landmarks::Abstra end function connected_habitat(qˢ::AbstractVector, # Source qualities - qᵗ::AbstractVector, # Target qualities - S::AbstractMatrix; # Matrix of proximities + qᵗ::AbstractVector, # Target qualities + S::AbstractMatrix; # Matrix of proximities workspaces=[similar(S, size(S, 1), 1)], kw... -) +) mul!(view(workspaces[1], :, 1), S, qᵗ) .*= qˢ end @@ -337,7 +337,7 @@ function bellman_ford(Pref::SparseMatrixCSC, C::SparseMatrixCSC, θ::Real, targe iter = 0 trPref = copy(Pref') - trC = copy(C') + trC = copy(C') while !convergence φ_1 = copy(φ) @@ -349,7 +349,7 @@ function bellman_ford(Pref::SparseMatrixCSC, C::SparseMatrixCSC, θ::Real, targe if updatelist[1] == -1 updatelist = [index] else - if rawDistances[node - 1] == rawDistances[node] + if rawDistances[node-1] == rawDistances[node] append!(updatelist, index) # Equidistant nodes should be updated simultaneously else c̄, φ = _bellman_ford_update_transposed!(c̄, φ, trPref, trC, θ, updatelist) @@ -365,7 +365,7 @@ function bellman_ford(Pref::SparseMatrixCSC, C::SparseMatrixCSC, θ::Real, targe break # Break the loop if in a single pass approach end iter += 1 - if iter==1 + if iter == 1 continue end # check if the free energy and the RSP have converged @@ -376,17 +376,17 @@ end # Updates the RSP and free energy vectors for a given list of nodes # Inputs: - # c̄: the directed expected cost (RSP dissimilarity) - # φ: the directed free energy - # trPref: the (transposed) transition probability matrix - # trC: the (transposed) cost matrix - # θ: the inverse temperature - # updatelist: the list of nodes that should be updated simultaneously +# c̄: the directed expected cost (RSP dissimilarity) +# φ: the directed free energy +# trPref: the (transposed) transition probability matrix +# trC: the (transposed) cost matrix +# θ: the inverse temperature +# updatelist: the list of nodes that should be updated simultaneously # Outputs: - # c̄: the updated directed expected cost (RSP dissimilarity) - # φ: the updated directed free energy +# c̄: the updated directed expected cost (RSP dissimilarity) +# φ: the updated directed free energy # Comment: - # The two sparse arrays in passed in transposed form since it makes the access much more efficient +# The two sparse arrays in passed in transposed form since it makes the access much more efficient function _bellman_ford_update_transposed!(c̄::Vector, φ::Vector, trPref::SparseMatrixCSC, trC::SparseMatrixCSC, θ::Real, updatelist::Vector) if length(updatelist) == 1 index = updatelist[1] @@ -395,8 +395,8 @@ function _bellman_ford_update_transposed!(c̄::Vector, φ::Vector, trPref::Spars φ[index] = v return c̄, φ end - prev_φ=copy(φ) - prev_c̄=copy(c̄) + prev_φ = copy(φ) + prev_c̄ = copy(c̄) for i in 1:length(updatelist) index = updatelist[i] ec, v = _bellman_ford_update_node_transposed(prev_c̄, prev_φ, trPref, trC, θ, index) @@ -418,7 +418,7 @@ function mygetindex(A::SparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::Integer) wh nzval = Tv[] iI = 1 - for iptr in A.colptr[J]:(A.colptr[J + 1] - 1) + for iptr in A.colptr[J]:(A.colptr[J+1]-1) iA = A.rowval[iptr] while iI <= nI && I[iI] <= iA if I[iI] == iA @@ -433,19 +433,19 @@ end # Updates the directed RSP and direct free energy value for a given node # Inputs: - # c̄: the directed expected cost (RSP dissimilarity) - # φ: the directed free energy - # trPref: the (transposed) transition probability matrix - # trC: the (transposed) cost matrix - # θ: the inverse temperature - # index: the index of the node that should be updated +# c̄: the directed expected cost (RSP dissimilarity) +# φ: the directed free energy +# trPref: the (transposed) transition probability matrix +# trC: the (transposed) cost matrix +# θ: the inverse temperature +# index: the index of the node that should be updated # Outputs: - # ec: the updated directed expected cost (RSP dissimilarity) for the node - # v: the updated directed free energy for the node +# ec: the updated directed expected cost (RSP dissimilarity) for the node +# v: the updated directed free energy for the node # Comment: - # The two sparse arrays in passed in transposed form since it makes the access much more efficient +# The two sparse arrays in passed in transposed form since it makes the access much more efficient function _bellman_ford_update_node_transposed(c̄::Vector, φ::Vector, trPref::SparseMatrixCSC, trC::SparseMatrixCSC, θ::Real, index::Integer) - Prefindex = trPref[:,index] + Prefindex = trPref[:, index] idx = Prefindex.nzind # Get the list of successors # computation of θ(cᵢⱼ+φ(j,t))-log([Pʳᵉᶠ]ᵢⱼ) # ect = (Array(trC[idx, index]) + φ[idx]) .* θ .- log.(Prefindex.nzval) @@ -460,13 +460,13 @@ function _bellman_ford_update_node_transposed(c̄::Vector, φ::Vector, trPref::S # First check there is only one neighbor, if so, the solution is trivial if length(idx) == 1 - return c̄[idx[1]] + trC[idx[1], index], ect[1]/θ + return c̄[idx[1]] + trC[idx[1], index], ect[1] / θ end # log-sum-exp trick minval = minimum(ect) # computation of cᵢ* ect .-= minval # remove the lowest value from all the vector - v = (minval - log(sum(exp, -ect)))/θ # computation of the directed free energy + v = (minval - log(sum(exp, -ect))) / θ # computation of the directed free energy if isinf(v) throw(ErrorException("infinite valude in the distance vector at index $index")) end @@ -475,8 +475,8 @@ function _bellman_ford_update_node_transposed(c̄::Vector, φ::Vector, trPref::S ec = zero(eltype(c̄)) for j in 1:length(idx) trCidxjindex = trC[idx[j], index] - pij = trPref[idx[j], index]*exp(θ*(v - φ[idx[j]] - trCidxjindex)) - ec += pij*(trCidxjindex + c̄[idx[j]]) + pij = trPref[idx[j], index] * exp(θ * (v - φ[idx[j]] - trCidxjindex)) + ec += pij * (trCidxjindex + c̄[idx[j]]) end return ec, v end diff --git a/src/solvers.jl b/src/solvers.jl index d0dbf13..f13ddaa 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -6,7 +6,7 @@ Solve all operations on a fully materialised Z matrix. This is fast but memory inneficient for CPUS, and isn't threaded. But may be best for GPUs using CuSSP.jl ? """ -@kwdef struct MatrixSolver <: Solver +@kwdef struct MatrixSolver <: Solver check::Bool = true end @@ -16,7 +16,7 @@ end Use julias default solver but broken into columns, with less memory use and the capacity for threading """ -@kwdef struct VectorSolver <: Solver +@kwdef struct VectorSolver <: Solver check::Bool = true threaded::Bool = false end @@ -53,7 +53,7 @@ problem = ConScape.Problem(; ) ```` """ -struct LinearSolver{A,K} <: Solver +struct LinearSolver{A,K} <: Solver args::A keywords::K threaded::Bool @@ -62,31 +62,31 @@ LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) # In `init!` we allocate all large dense arrays function init!( - ws::NamedTuple, - solver::MatrixSolver, - cm::FundamentalMeasure, + ws::NamedTuple, + solver::MatrixSolver, + cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, -) - _init!(ws, solver, cm, p, rast; verbose) +) + _init!(ws, solver, cm, p, rast; verbose) end function init!( - ws::NamedTuple, - solver::Union{VectorSolver,LinearSolver}, - cm::FundamentalMeasure, + ws::NamedTuple, + solver::Union{VectorSolver,LinearSolver}, + cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, -) +) grid = Grid(p, rast) - workspace = _init!(ws, solver, cm, p, rast; verbose) + workspace = _init!(ws, solver, cm, p, rast; verbose) if isthreaded(solver) nbuffers = Thread.nthreads() channel = Channel{typeof(workspace)}(nbuffers) put!(channel, workspace) for n in 2:nbuffers - workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) + workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) put!(channel, workspace_N) end return (; channel) @@ -95,14 +95,14 @@ function init!( end end function _init!( - ws::NamedTuple, - solver::Solver, - cm::FundamentalMeasure, + ws::NamedTuple, + solver::Solver, + cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, grid=Grid(p, rast) -) +) verbose && println("Retreiving measures...") g = grid gms = graph_measures(p) @@ -111,7 +111,7 @@ function _init!( # B_dense becomes Z verbose && println("Allocating workspaces...") sze = _workspace_size(solver, g) - Z = if hastrait(needs_inv, gms) + Z = if hastrait(needs_inv, gms) if haskey(ws, :Z) _reshape(ws.Z, sze) else @@ -120,19 +120,19 @@ function _init!( else nothing end - Zⁱ = if hastrait(needs_inv, gms) - haskey(ws, :Zⁱ) ? _reshape(ws.Zⁱ, sze) : similar(Z) + Zⁱ = if hastrait(needs_inv, gms) + haskey(ws, :Zⁱ) ? _reshape(ws.Zⁱ, sze) : similar(Z) else nothing end n_workspaces = count_workspaces(p) n_permuted_workspaces = count_permuted_workspaces(p) - workspaces = if haskey(ws, :workspaces) + workspaces = if haskey(ws, :workspaces) [_reshape(w, size(Z)) for w in ws.workspaces] else [similar(Z) for _ in 1:n_workspaces] end - permuted_workspaces = if haskey(ws, :workspaces) + permuted_workspaces = if haskey(ws, :workspaces) [_reshape(pw, size(Z')) for pw in ws.permuted_workspaces] else [similar(Z') for _ in 1:n_permuted_workspaces] @@ -152,9 +152,9 @@ function _init!( else nothing end - function matrix_or_nothing(gm) - if returntype(gm) isa ReturnsDenseSpatial - A = fill(NaN, size(rast)) + function matrix_or_nothing(gm) + if returntype(gm) isa ReturnsDenseSpatial + A = fill(NaN, size(rast)) A[grid.id_to_grid_coordinate_list] .= 0.0 A else @@ -177,7 +177,7 @@ function _init!( matrix_or_nothing(gm) end end - + verbose && println("Finished allocating...") return (; Z, Zⁱ, workspaces, permuted_workspaces, g=grid, grid, free_energy_distances, expected_costs, proximities, outputs) @@ -186,19 +186,19 @@ function init!( workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, grid=Grid(p, rast), -) +) # TODO what is needed here? return (; grid) end # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve!( - workspace::NamedTuple, - s::MatrixSolver, - cm::ConnectivityMeasure, + workspace::NamedTuple, + s::MatrixSolver, + cm::ConnectivityMeasure, p::AbstractProblem; verbose=false -) +) g = workspace.g return map(graph_measures(p), workspace.outputs) do gm, output compute(gm, p; workspace..., output) @@ -206,11 +206,11 @@ function solve!( end function solve!( ws::NamedTuple, - solver::MatrixSolver, - cm::FundamentalMeasure, + solver::MatrixSolver, + cm::FundamentalMeasure, p::Problem; verbose=false, -) +) ws1 = _init_sparse(ws, solver, cm, p, ws.grid; verbose) ws2 = _solve_dense!(ws1, solver, cm, p; verbose) gms = graph_measures(p) @@ -220,10 +220,10 @@ end function solve!( ws::NamedTuple, solver::Union{VectorSolver,LinearSolver}, - cm, + cm, p::Problem; verbose=false, -) +) # Get grid and preallocated vectors (; g) = ws gms = graph_measures(p) @@ -236,7 +236,7 @@ function solve!( _update_targets!(target_allocs, g, 1) target_properties = (; targetidx, targetnodes, qt) target_grid = ConstructionBase.setproperties(g, target_properties) - ws1 =_init_sparse(ws, solver, cm, p, target_grid; verbose) + ws1 = _init_sparse(ws, solver, cm, p, target_grid; verbose) ws2 = merge(ws1, (; grid=target_grid, g=target_grid)) target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) @@ -245,7 +245,7 @@ function solve!( target_results[1] = result1 - function run(i) + function run(i) target_qualities = g.target_qualities[g.targetidx[i]] _update_targets!(target_allocs, g, i) first = false @@ -260,7 +260,7 @@ function solve!( if isthreaded(solver) isthreaded(p) && error("threading at solver level not properly implemented") # Threads.@threads for i in eachindex(g.targetnodes)[2:end] - # run(i) + # run(i) # end else for i in eachindex(g.targetnodes)[2:end] @@ -301,14 +301,14 @@ function _solve!(workspace, solver, cm, dt::NamedTuple{DT}, gms::NamedTuple{GMS} # Combine nested and flat results return map(GMS) do k f = flat[k] - if isnothing(f) + if isnothing(f) map(n -> n[k], nested) else f end end |> NamedTuple{GMS} end -function _solve!(workspace, solver, cm, dt, gms::NamedTuple{GMS}, p; verbose) where GMS +function _solve!(workspace, solver, cm, dt, gms::NamedTuple{GMS}, p; verbose) where {GMS} (; grid, Pref, W, Z, outputs) = workspace # GridRSP is just a wrapper now, we can remove it later grsp = GridRSP(grid, cm.θ, Pref, W, Z) @@ -329,7 +329,7 @@ function _update_targets!(a, g, i) end # Do all the work shared accross outputs -function _solve_dense!(ws::NamedTuple, solver::Solver, cm, p::Problem; +function _solve_dense!(ws::NamedTuple, solver::Solver, cm, p::Problem; verbose=false ) (; grid, W, Pref, A, A_init, Aadj_init, Aadj) = ws @@ -355,7 +355,7 @@ function _solve_dense!(ws::NamedTuple, solver::Solver, cm, p::Problem; end grsp = GridRSP(grid, cm.θ, Pref, W, Z) - workspace = (; ws..., Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ) + workspace = (; ws..., Pref, W, A, A_init, Aadj, Aadj_init, Z, Zⁱ) expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost verbose && println("Calculating expected cost...") @@ -415,7 +415,7 @@ isthreaded(s::VectorSolver) = s.threaded # Solver init init(::Union{Nothing,MatrixSolver,VectorSolver}, A::AbstractMatrix) = (; F=lu(A)) -function init(solver::VectorSolver, A::AbstractMatrix) +function init(solver::VectorSolver, A::AbstractMatrix) F = lu(A) if isthreaded(solver) nbuffers = Threads.nthreads() @@ -475,10 +475,10 @@ function LinearAlgebra.ldiv!(s::LinearSolver, init, B; B_copy) end return B end -LinearAlgebra.ldiv!(::Union{MatrixSolver,Nothing}, (; F), B; B_copy=copy(B)) = +LinearAlgebra.ldiv!(::Union{MatrixSolver,Nothing}, (; F), B; B_copy=copy(B)) = ldiv!(B, F, B_copy) # LinearAlgebra.ldiv!(solver::Solver, A::AbstractMatrix, B::AbstractMatrix; kw...) = - # ldiv!(solver, init(solver, A), B; kw...) +# ldiv!(solver, init(solver, A), B; kw...) function LinearAlgebra.ldiv!(s::VectorSolver, init, B; B_copy) # for SparseArrays.UMFPACK._AqldivB_kernel!(Z, F, B, transposeoptype) transposeoptype = SparseArrays.LibSuiteSparse.UMFPACK_A @@ -504,7 +504,7 @@ end # graph_measure, but we want a single RasterStack. # So we merge the names of the two layers -function _merge_to_stack(nt::NamedTuple{K}) where K +function _merge_to_stack(nt::NamedTuple{K}) where {K} unique_nts = map(K) do k _mergename(Val{k}(), nt[k]) end @@ -521,14 +521,14 @@ _maybe_raster(x) = x _maybe_raster(x::Raster) = x _maybe_raster(x::Number) = Raster(fill(x), ()) _maybe_raster(mat::Raster, g) = mat -_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = +_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = _maybe_raster(mat, dims(g)) -_maybe_raster(mats::NamedTuple, g::Union{Grid,GridRSP}) = +_maybe_raster(mats::NamedTuple, g::Union{Grid,GridRSP}) = map(mat -> _maybe_raster(mat, g), mats) _maybe_raster(mat::AbstractMatrix, ::Nothing) = mat _maybe_raster(mat::AbstractMatrix, dims::Tuple) = Raster(mat, dims) -function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1, K2} +function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1,K2} # Combine outer and inner names with an underscore joinedkeys = map(K2) do k2 Symbol(K1, :_, k2) @@ -536,8 +536,8 @@ function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1, K2} # And rename the NamedTuple NamedTuple{joinedkeys}(map(_maybe_raster, values(gm))) end -_mergename(::Val{K1}, gm) where K1 = - # We keep the name as is +_mergename(::Val{K1}, gm) where {K1} = +# We keep the name as is NamedTuple{(K1,)}((_maybe_raster(gm),)) function _check_z(s, Z, W, g) @@ -549,9 +549,9 @@ end # This duplicats some logic from gridrsp function _setproximities!( - proximities::AbstractMatrix, - expected_costs::AbstractMatrix, - cm::ConnectivityMeasure, + proximities::AbstractMatrix, + expected_costs::AbstractMatrix, + cm::ConnectivityMeasure, p::Problem, grsp::GridRSP ) @@ -577,7 +577,7 @@ function _reshape(A::Array, dims::Tuple{Vararg{Int}}) v = vec(A) resize!(v, len) reshape(v, dims) - # else + # else # error("Arrays were not sorted. Current len: $(length(A)), needed len: $len") end end \ No newline at end of file diff --git a/src/transformations.jl b/src/transformations.jl index ac89c25..912f8b1 100644 --- a/src/transformations.jl +++ b/src/transformations.jl @@ -6,30 +6,30 @@ Abstrct supertype for distance transformation functions. """ abstract type Transformation end -struct MinusLog <: Transformation end -struct ExpMinus <: Transformation end -struct Inv <: Transformation end -struct OddsAgainst <: Transformation end -struct OddsFor <: Transformation end -struct ExpMinusAlpha{T} <: Transformation +struct MinusLog <: Transformation end +struct ExpMinus <: Transformation end +struct Inv <: Transformation end +struct OddsAgainst <: Transformation end +struct OddsFor <: Transformation end +struct ExpMinusAlpha{T} <: Transformation alpha::T end -struct MinusLogAlpha{T} <: Transformation +struct MinusLogAlpha{T} <: Transformation alpha::T end -(::MinusLog)(x::Number) = -log(x) -(::ExpMinus)(x::Number) = exp(-x) -(::Inv)(x::Number) = inv(x) -(::OddsAgainst)(x::Number) = inv(x) - 0 -(::OddsFor)(x::Number) = x/(0 - x) +(::MinusLog)(x::Number) = -log(x) +(::ExpMinus)(x::Number) = exp(-x) +(::Inv)(x::Number) = inv(x) +(::OddsAgainst)(x::Number) = inv(x) - 0 +(::OddsFor)(x::Number) = x / (0 - x) (t::ExpMinusAlpha)(x::Number) = exp(-x / t.alpha) # (t::MinusLogAlpha)(x::Number) = -log(x * t.alpha) TODO: what is the inverse of ExpMinusAlpha -Base.inv(::MinusLog) = ExpMinus() -Base.inv(::ExpMinus) = MinusLog() -Base.inv(::Inv) = Inv() -Base.inv(::OddsAgainst) = OddsFor() -Base.inv(::OddsFor) = OddsAgainst() +Base.inv(::MinusLog) = ExpMinus() +Base.inv(::ExpMinus) = MinusLog() +Base.inv(::Inv) = Inv() +Base.inv(::OddsAgainst) = OddsFor() +Base.inv(::OddsFor) = OddsAgainst() Base.inv(t::MinusLogAlpha) = ExpMinus(t.alpha) Base.inv(t::ExpMinusAlpha) = MinusLog(t.alpha) diff --git a/src/windows.jl b/src/windows.jl index b2a25d4..6b9cba6 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -42,10 +42,10 @@ WindowedProblem(problem; kw...) = WindowedProblem(; problem, kw...) centersize(p::WindowedProblem) = p.centersize, p.centersize isthreaded(p::WindowedProblem) = p.threaded -function solve(p::WindowedProblem, rast::RasterStack; +function solve(p::WindowedProblem, rast::RasterStack; verbose=false, test_windows=false, mosaic_return=true, timed=false, kw... ) - solve!(init(p, rast; verbose, kw...), p; + solve!(init(p, rast; verbose, kw...), p; verbose, test_windows, mosaic_return, timed ) end @@ -62,7 +62,7 @@ function solve!(workspace, p::WindowedProblem; _get_window_with_zeroed_buffer(view, p, rast, window_ranges[i]) end return if mosaic_return - Rasters.mosaic(sum, collect(skipmissing(output_stacks)); + Rasters.mosaic(sum, collect(skipmissing(output_stacks)); to=rast, missingval=0.0, verbose ) else @@ -88,14 +88,14 @@ function solve!(workspace, p::WindowedProblem; verbose && println("Getting workspace from channel...") workspace = take!(ch) verbose && println("Initialising window from size $(size(window_rast)), from ranges $window...") - workspace_initialised = init!(workspace, p.problem, window_rast; verbose) + workspace_initialised = init!(workspace, p.problem, window_rast; verbose) # Solve for the window verbose && println("Solving window $window...") output_stacks[i] = solve!(workspace_initialised, p.problem) # Return the workspace to the channel put!(ch, workspace) end - window_elapsed = Vector{Pair{Float64,Int64}}(undef, length(sorted_indices)) + window_elapsed = Vector{Pair{Float64,Int64}}(undef, length(sorted_indices)) # Run the window problems if p.threaded Threads.@threads for i in eachindex(sorted_indices) @@ -130,11 +130,11 @@ function solve!(workspace, p::WindowedProblem; end init(p::WindowedProblem, rast::RasterStack; kw...) = init!((;), p, rast; kw...) -function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; +function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; window_ranges=_window_ranges(p, rast), window_sizes=_window_sizes(p, rast; window_ranges), window_indices=_window_indices(p, rast; window_ranges), - sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), + sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), verbose=true, ) n = min(length(window_indices), p.threaded ? Threads.nthreads() : 1) @@ -202,22 +202,22 @@ for nested operations. grain::Union{Nothing,Int} = nothing ext::String = ".tif" end -function BatchProblem(problem::Problem; +function BatchProblem(problem::Problem; centersize::Union{Int,Tuple{Int,Int}}, kw... -) +) centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) BatchProblem(; problem, centersize, kw...) end -function BatchProblem(problem::WindowedProblem; +function BatchProblem(problem::WindowedProblem; nwindows=nothing, - centersize::Union{Nothing,Int,Tuple{Int,Int}}=nothing, + centersize::Union{Nothing,Int,Tuple{Int,Int}}=nothing, buffer::Union{Nothing,Int}=nothing, kw... ) - buffer = if isnothing(buffer) + buffer = if isnothing(buffer) problem.buffer else - buffer == problem.buffer || + buffer == problem.buffer || throw(ArgumentError("BatchProblem buffer must match WindowedProblem buffer. Got $buffer and $(problem.buffer)")) buffer end @@ -226,7 +226,7 @@ function BatchProblem(problem::WindowedProblem; centersize = x, x else centersize = centersize isa Tuple{Int,Int} ? centersize : (centersize, centersize) - map(centersize, ConScape.centersize(problem)) do bcs, wcs + map(centersize, ConScape.centersize(problem)) do bcs, wcs rem(bcs, wcs) == 0 || throw(ArgumentError("BatchProblem centersize must be a multiple of WindowedProblem centersize. Got $centersize and $(problem.centersize)")) end @@ -248,7 +248,7 @@ end centersize(p::BatchProblem) = p.centersize -function solve(p::BatchProblem, rast::RasterStack; +function solve(p::BatchProblem, rast::RasterStack; window_indices=_window_indices(p, rast), kw... ) for i in eachindex(window_indices) @@ -266,7 +266,7 @@ function solve!(ws::NamedTuple, p::BatchProblem; verbose=false, kw...) return _store(p, output, ws.window; verbose) end -function init(p::BatchProblem, rast::RasterStack, i::Int; +function init(p::BatchProblem, rast::RasterStack, i::Int; window_ranges=_window_ranges(p, rast), window_indices=(println("Calculating window indices, pass `window_indices` to skip... "); _window_indices(p, rast; window_ranges)), kw... @@ -317,8 +317,8 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack ) end function assess( - p::AbstractWindowedProblem{<:AbstractWindowedProblem}, - rast::AbstractRasterStack; + p::AbstractWindowedProblem{<:AbstractWindowedProblem}, + rast::AbstractRasterStack; nthreads=Threads.nthreads(), verbose=true, kw... @@ -344,7 +344,7 @@ function assess( window_rast = take!(channel) function empty_assesment() verbose && println(" No targets found") - WindowAssessment(; + WindowAssessment(; shape=(0, 0), njobs=0, sizes=Tuple{Int,Int}[], @@ -384,7 +384,7 @@ function assess( shape = size(window_ranges) return NestedAssessment( shape, - njobs, + njobs, mask, indices, assessments, @@ -400,16 +400,16 @@ function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) return Rasters.mosaic(sum, stacks; missingval, to, kw...) end -function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where K +function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where {K} dir = mkpath(_window_path(p, ranges)) - return Rasters.write(joinpath(dir, ""), output; + return Rasters.write(joinpath(dir, ""), output; ext=p.ext, force=true, verbose=false, kw... ) end function _window_path(p, ranges::Tuple) corners = map(first, ranges) - window_dirname = "window_" * join(corners, '_') + window_dirname = "window_" * join(corners, '_') return joinpath(p.datapath, window_dirname) end @@ -442,9 +442,9 @@ function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRa # Define the corners of each window corners = CartesianIndices(size)[begin:cs1:end-2buffer, begin:cs2:end-2buffer] # Create an iterator of ranges for retreiving each window - return [map((i, s, ws) -> i:min(s, i + ws-1), Tuple(c), size, windowsize) for c in corners] + return [map((i, s, ws) -> i:min(s, i + ws - 1), Tuple(c), size, windowsize) for c in corners] end - + function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast::RasterStack, rs) source = view(rast, rs...) # Reshape and rebuild to resuse memory @@ -460,16 +460,16 @@ function _get_window_with_zeroed_buffer!(dest, p::AbstractWindowedProblem, rast: return _with_sparse_targets(p, source, dest) end -_get_window_with_zeroed_buffer(p::AbstractWindowedProblem, args...) = +_get_window_with_zeroed_buffer(p::AbstractWindowedProblem, args...) = _get_window_with_zeroed_buffer(view, p, args...) -_get_window_with_zeroed_buffer(f::Function , p::AbstractWindowedProblem, rast::RasterStack) = +_get_window_with_zeroed_buffer(f::Function, p::AbstractWindowedProblem, rast::RasterStack) = _get_window_with_zeroed_buffer(f, p, rast, axes(rast)) function _get_window_with_zeroed_buffer(f::Function, p::AbstractWindowedProblem, rast::RasterStack, rs) source = f(rast, rs...) return _with_sparse_targets(p, source, source) end -_target_ranges(p, source) = map(s -> buffer(p) + 1:s - buffer(p), size(source)) +_target_ranges(p, source) = map(s -> buffer(p)+1:s-buffer(p), size(source)) function _with_sparse_targets(p, source, dest) tq = source.target_qualities From 20bac98d9975c4b4aa2bc13ce7c1be0acf452dd5 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 17 Feb 2025 13:50:14 +0100 Subject: [PATCH 32/51] reorganise and document --- src/connectivity_measure.jl | 14 ++++++- src/graph_measure.jl | 79 ++++++++++++++++------------------- src/grid.jl | 34 ++++++++------- src/gridrsp.jl | 10 +++-- src/randomizedshortestpath.jl | 4 +- src/solvers.jl | 2 + 6 files changed, 76 insertions(+), 67 deletions(-) diff --git a/src/connectivity_measure.jl b/src/connectivity_measure.jl index d36e183..4904ad4 100644 --- a/src/connectivity_measure.jl +++ b/src/connectivity_measure.jl @@ -1,5 +1,12 @@ -# New type-based interface -# Easier to add parameters to these +""" + GraphMeasure + +Abstract supertype for connectivity measures. + +These are lazy definitions of conscape functions, +with required parameters attached rather than passed +in through keywords. +""" abstract type ConnectivityMeasure end abstract type FundamentalMeasure <: ConnectivityMeasure end @@ -29,8 +36,11 @@ keywords(cm::ConnectivityMeasure) = _keywords(cm) distance_transformation(cm::FundamentalMeasure) = nothing distance_transformation(cm::DistanceMeasure) = cm.distance_transformation + # TODO remove the complexity of the connectivity_function # These methods are mostly to avoid changing the original interface for now +# Its a quirk of how MeanKullbackLeiblerDivergence is implemented +# that these can be calculated separately from the main grid connectivity_function(::LeastCostDistance) = least_cost_distance connectivity_function(::ExpectedCost) = expected_cost connectivity_function(::FreeEnergyDistance) = free_energy_distance diff --git a/src/graph_measure.jl b/src/graph_measure.jl index efd1fcd..7afa482 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -6,34 +6,13 @@ These are lazy definitions of conscape functions. """ abstract type GraphMeasure end -abstract type ReturnType end -struct ReturnsDenseSpatial <: ReturnType end -struct ReturnsSparse <: ReturnType end -struct ReturnsScalar <: ReturnType end -struct ReturnsOther{F} <: ReturnType - f::F -end - -""" - NoWriteArray - -A Julia AbstractArray wrapper that errors on `setindex!`, for testing. -""" -mutable struct NoWriteArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} - __data::A -end - -Base.size(A::NoWriteArray) = size(A.__data) -Base.copy(A::NoWriteArray) = copy(A.__data) -Base.getindex(A::NoWriteArray, i...) = A.__data[i...] -Base.setindex!(A::NoWriteArray, v, i...) = error("Cannot write to NoWriteArray") -Base.:(==)(A::NoWriteArray, B::NoWriteArray) = A.__data == B.__data - abstract type TopologicalMeasure <: GraphMeasure end abstract type BetweennessMeasure <: GraphMeasure end abstract type PerturbationMeasure <: GraphMeasure end abstract type PathDistributionMeasure <: GraphMeasure end +# Concrete GraphMeasure structs + struct BetweennessQweighted <: BetweennessMeasure end @kwdef struct BetweennessKweighted <: BetweennessMeasure end struct EdgeBetweennessQweighted <: BetweennessMeasure end @@ -54,18 +33,8 @@ end struct MeanLeastCostKullbackLeiblerDivergence <: PathDistributionMeasure end struct MeanKullbackLeiblerDivergence <: PathDistributionMeasure end -# These allow calculation of return allocations -returntype(::EdgeBetweennessQweighted) = ReturnsSparse() -returntype(::EdgeBetweennessKweighted) = ReturnsSparse() -returntype(::BetweennessQweighted) = ReturnsDenseSpatial() -returntype(::BetweennessKweighted) = ReturnsDenseSpatial() -returntype(::ConnectedHabitat) = ReturnsDenseSpatial() -returntype(::Criticality) = ReturnsDenseSpatial() -returntype(::EigMax) = ReturnsOther((n, m) -> n + m) -returntype(::MeanLeastCostKullbackLeiblerDivergence) = ReturnsScalar() -returntype(::MeanKullbackLeiblerDivergence) = ReturnsScalar() - # Map structs to function calls + graph_function(m::BetweennessKweighted) = betweenness_kweighted graph_function(m::BetweennessQweighted) = betweenness_qweighted graph_function(m::ConnectedHabitat) = connected_habitat @@ -76,7 +45,8 @@ graph_function(m::EdgeBetweennessKweighted) = edge_betweenness_kweighted graph_function(m::EdgeBetweennessQweighted) = edge_betweenness_qweighted graph_function(m::EigMax) = eigmax -# Get function keywords +# Function keywords + keywords(gm::GraphMeasure, p::AbstractProblem) = (; _keywords(gm)..., solver=solver(p), _connectivity_keywords(gm, p)...) keywords(gm::ConnectedHabitat, p::AbstractProblem) = @@ -94,6 +64,27 @@ function _connectivity_keywords(gm::GraphMeasure, p::AbstractProblem) end end +# Traits + +abstract type ReturnType end +struct ReturnsDenseSpatial <: ReturnType end +struct ReturnsSparse <: ReturnType end +struct ReturnsScalar <: ReturnType end +struct ReturnsOther{F} <: ReturnType + f::F +end + +# These allow calculation of return allocations +returntype(::EdgeBetweennessQweighted) = ReturnsSparse() +returntype(::EdgeBetweennessKweighted) = ReturnsSparse() +returntype(::BetweennessQweighted) = ReturnsDenseSpatial() +returntype(::BetweennessKweighted) = ReturnsDenseSpatial() +returntype(::ConnectedHabitat) = ReturnsDenseSpatial() +returntype(::Criticality) = ReturnsDenseSpatial() +returntype(::EigMax) = ReturnsOther((n, m) -> n + m) +returntype(::MeanLeastCostKullbackLeiblerDivergence) = ReturnsScalar() +returntype(::MeanKullbackLeiblerDivergence) = ReturnsScalar() + # A trait for connectivity requirement needs_connectivity(::GraphMeasure) = false needs_connectivity(::BetweennessKweighted) = true @@ -122,13 +113,7 @@ needs_free_energy_distance(::GraphMeasure) = false needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true needs_Aaj_init(::GraphMeasure) = true # TODO which dont? -# Trait aggregator -hastrait(t, gms) = mapreduce(t, |, gms; init=false) - -# compute: run the function -compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = - graph_function(gm)(g; keywords(gm, p)..., kw...) - +# Trait helpers function count_workspaces(p::AbstractProblem) gms = graph_measures(p) @@ -138,4 +123,12 @@ function count_workspaces(p::AbstractProblem) end end count_permuted_workspaces(p::AbstractProblem) = - mapreduce(needs_permuted_workspaces, max, graph_measures(p)) \ No newline at end of file + mapreduce(needs_permuted_workspaces, max, graph_measures(p)) + +# Trait aggregator +hastrait(t, gms) = reduce(|, map(t, gms); init=false) + + +# compute: run a graph function with the appropriate keywords +compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = + graph_function(gm)(g; keywords(gm, p)..., kw...) \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index c46cee9..888fffc 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -24,11 +24,13 @@ end costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), prune=true)::Grid -Construct a `Grid` from an `affinities` matrix of type `SparseMatrixCSC`. It is possible -to also supply matrices of `source_qualities` and `target_qualities` as well as -a `costs` function that maps the `affinities` matrix to a `costs` matrix. Alternatively, -it is possible to supply a matrix to `costs` directly. If `prune=true` (the default), the -affinity and cost matrices will be pruned to exclude unreachable nodes. +Construct a `Grid` from an `affinities` matrix of type `SparseMatrixCSC`. + +It is possible to also supply matrices of `source_qualities` and `target_qualities` as well as +a `costs` function that maps the `affinities` matrix to a `costs` matrix. + +Alternatively, it is possible to supply a matrix to `costs` directly. If `prune=true` (the default), +the affinity and cost matrices will be pruned to exclude unreachable nodes. """ function Grid(nrows::Integer, ncols::Integer; @@ -403,10 +405,11 @@ end approx::Bool=false ) -Compute the randomized shorted path based expected costs from all source nodes to -all target nodes in the graph defined by `g` using the inverse temperature parameter -`θ`. The computation can either continue until convergence when setting `approx=false` -(the default) or return an approximate result based on just a single iteration of the Bellman-Ford +Compute the randomized shorted path based expected costs from all source nodes to all +target nodes in the graph defined by `g` using the inverse temperature parameter `θ`. + +The computation can either continue until convergence when setting `approx=false` (the default) +or return an approximate result based on just a single iteration of the Bellman-Ford algorithm when `approx=true`. """ function expected_cost( @@ -448,10 +451,11 @@ end approx::Bool=false ) -Compute the directed free energy distance from all source nodes to -all target nodes in the graph defined by `g` using the inverse temperature parameter -`θ`. The computation can either continue until convergence when setting `approx=false` -(the default) or return an approximate result based on just a single iteration of the Bellman-Ford +Compute the directed free energy distance from all source nodes to all target +nodes in the graph defined by `g` using the inverse temperature parameter `θ`. + +The computation can either continue until convergence when setting `approx=false` (the default), +or return an approximate result based on just a single iteration of the Bellman-Ford algorithm when `approx=true`. """ function free_energy_distance( @@ -459,10 +463,8 @@ function free_energy_distance( θ::Union{Real,Nothing}=nothing, approx::Bool=false ) - # FIXME! This should be multithreaded. However, ProgressLogging currently - # does not support multithreading targets = ConScape._targetidx_and_nodes(g)[1] - @progress vec_of_vecs = [_free_energy_distance(g, target, θ, approx) for target in targets] + vec_of_vecs = [_free_energy_distance(g, target, θ, approx) for target in targets] return reduce(hcat, vec_of_vecs) end diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 56c51d2..f4492ab 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -162,7 +162,8 @@ least_cost_distance(grsp::Union{GridRSP,NamedTuple}; kw...) = least_cost_distanc """ mean_kl_divergence(grsp::GridRSP)::Float64 -Compute the mean Kullback–Leibler divergence between the free energy distances and the RSP expected costs for `grsp::GridRSP`. +Compute the mean Kullback–Leibler divergence between the free +energy distances and the RSP expected costs for `grsp::GridRSP`. """ function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; free_energy_distances=nothing, @@ -195,7 +196,8 @@ end """ mean_lc_kl_divergence(grsp::GridRSP)::Float64 -Compute the mean Kullback–Leibler divergence between the least-cost path and the random path distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. +Compute the mean Kullback–Leibler divergence between the least-cost path and the random path +distribution for `grsp::GridRSP`, weighted by the qualities of the source and target node. """ function mean_lc_kl_divergence(grsp::Union{GridRSP,NamedTuple}; workspaces=[similar(grsp.Z)], @@ -266,8 +268,8 @@ end """ least_cost_kl_divergence(grsp::GridRSP, target::Tuple{Int,Int}) -Compute the least cost Kullback-Leibler divergence from each cell in the g in -`h` to the `target` cell. +Compute the least cost Kullback-Leibler divergence from each +cell in the g in `h` to the `target` cell. """ function least_cost_kl_divergence(grsp::Union{GridRSP,NamedTuple}, target::Tuple{Int,Int}; kw...) g = grsp.g diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index fe3b15d..f0d2159 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -407,7 +407,7 @@ function _bellman_ford_update_transposed!(c̄::Vector, φ::Vector, trPref::Spars end # Helper function required for good performance until https://github.com/JuliaLang/julia/pull/42647 has been released -function mygetindex(A::SparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::Integer) where {Tv,Ti} +function fast_getindex(A::SparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::Integer) where {Tv,Ti} if !issorted(I) throw(ArgumentError("only sorted indices are currectly supported")) end @@ -449,7 +449,7 @@ function _bellman_ford_update_node_transposed(c̄::Vector, φ::Vector, trPref::S idx = Prefindex.nzind # Get the list of successors # computation of θ(cᵢⱼ+φ(j,t))-log([Pʳᵉᶠ]ᵢⱼ) # ect = (Array(trC[idx, index]) + φ[idx]) .* θ .- log.(Prefindex.nzval) - ect = (Array(mygetindex(trC, idx, index)) .+ φ[idx]) .* θ .- log.(Prefindex.nzval) + ect = (Array(fast_getindex(trC, idx, index)) .+ φ[idx]) .* θ .- log.(Prefindex.nzval) finiteidx = isfinite.(ect) idx = idx[finiteidx] ect = ect[finiteidx] diff --git a/src/solvers.jl b/src/solvers.jl index f13ddaa..0653776 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -137,6 +137,8 @@ function _init!( else [similar(Z') for _ in 1:n_permuted_workspaces] end + # TODO these shouldn't have traits, it + # should be baked into the problem. expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost haskey(ws, :expected_costs) ? _reshape(ws.expected_costs, size(Z)) : similar(Z) else From da5768f2961acdd040c30bf327c31dbe8553cfaa Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 18 Feb 2025 15:55:18 +0100 Subject: [PATCH 33/51] add reassess --- src/windows.jl | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/windows.jl b/src/windows.jl index 6b9cba6..dd62d2e 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -382,21 +382,36 @@ function assess( # Calculate global stats njobs = count(mask) shape = size(window_ranges) - return NestedAssessment( - shape, - njobs, - mask, - indices, - assessments, - ) + return NestedAssessment(shape, njobs, mask, indices, assessments) +end + +""" + reassess(a::NestedAssessment, p::BatchProblem) + +Re-asses an existing nested assesment of a BatchProblem. + +""" +function reassess(a::NestedAssessment, p::BatchProblem) + # Paths for all batches + paths = _batch_paths(p, size(a)) + # Paths for non-empty batches + jobpaths = paths[a.indices] + # Find all the jobs that havent been saved (failed) + idxmask = .!(isdir.(jobpaths)) + # Generate new arrays of indices and assessments for the remaining jobs + indices = a.indices[idxmask] + mask = falses(a.shape) + mask[indices] .= true + assessments = a.assesments[idxmask] + njobs = lenth(indices) + + return NestedAssessment(shape, njobs, mask, indices, assessments) end # Mosaic the stored files to a RasterStack function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) - ranges = _window_ranges(p, to) - paths = [_window_path(p, rs) for rs in ranges] + paths = _batch_paths(p, to) stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] - return Rasters.mosaic(sum, stacks; missingval, to, kw...) end @@ -407,7 +422,10 @@ function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where {K ) end -function _window_path(p, ranges::Tuple) +_batch_paths(p, x::Union{RaterStack,Tuple}; window_ranges=_window_ranges(p, x)) = + [_batch_path(p, rs) for rs in window_ranges] + +function _batch_path(p, ranges::Tuple) corners = map(first, ranges) window_dirname = "window_" * join(corners, '_') return joinpath(p.datapath, window_dirname) @@ -433,8 +451,9 @@ function _window_sizes(p, rast::RasterStack; window_ranges=_window_ranges(p, ras end end -function _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) - size = Base.size(rast) +_window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) = + _window_ranges(p::Union{BatchProblem,WindowedProblem}, size(rast)) +function _window_ranges(p::Union{BatchProblem,WindowedProblem}, size::Tuple) centersize = ConScape.centersize(p) buffer = ConScape.buffer(p) windowsize = 2buffer .+ centersize @@ -531,6 +550,7 @@ end assessments::Vector{WindowAssessment} end +Base.size(a::AbstractAssessment) = a.shape function Base.show(io::IO, mime::MIME"text/plain", bs::ProblemAssessment) println(io, "NestedAssessment") println(io) From 25bc15540f0f2cefd95b949ec01bbeb698173783 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 18 Feb 2025 16:02:57 +0100 Subject: [PATCH 34/51] cleanup --- src/connectivity_measure.jl | 20 ++++++++++++-------- src/gridrsp.jl | 34 +++++++++++----------------------- src/problem.jl | 27 +++++++++++++++++---------- src/randomizedshortestpath.jl | 16 ++++++++-------- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/src/connectivity_measure.jl b/src/connectivity_measure.jl index 4904ad4..e11646c 100644 --- a/src/connectivity_measure.jl +++ b/src/connectivity_measure.jl @@ -13,22 +13,26 @@ abstract type FundamentalMeasure <: ConnectivityMeasure end abstract type DistanceMeasure <: FundamentalMeasure end struct LeastCostDistance <: ConnectivityMeasure end -@kwdef struct ExpectedCost{T<:Union{Real,Nothing},CM} <: DistanceMeasure - θ::T = nothing - distance_transformation::CM = nothing +@kwdef struct ExpectedCost{T<:Union{Real,Nothing},CM,DV} <: DistanceMeasure + θ::T + distance_transformation::CM + diagvalue::DV = nothing approx::Bool = false end -@kwdef struct FreeEnergyDistance{T<:Union{Real,Nothing},CM} <: DistanceMeasure - θ::T = nothing - distance_transformation::CM = nothing +@kwdef struct FreeEnergyDistance{T<:Union{Real,Nothing},CM,DV} <: DistanceMeasure + θ::T + distance_transformation::CM + diagvalue::DV = nothing approx::Bool = false end -@kwdef struct SurvivalProbability{T<:Union{Real,Nothing}} <: FundamentalMeasure +@kwdef struct SurvivalProbability{T<:Union{Real,Nothing},DV} <: FundamentalMeasure θ::T = nothing + diagvalue::DV = nothing # TODO should be 1 approx::Bool = false end -@kwdef struct PowerMeanProximity{T<:Union{Real,Nothing}} <: FundamentalMeasure +@kwdef struct PowerMeanProximity{T<:Union{Real,Nothing},DV} <: FundamentalMeasure θ::T = nothing + diagvalue::DV = nothing approx::Bool = false end diff --git a/src/gridrsp.jl b/src/gridrsp.jl index f4492ab..56a3c10 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -30,9 +30,6 @@ function GridRSP(g::Grid; θ=nothing, verbose=true) return GridRSP(g, θ, Pref, W, Z) end -_get_grid(grsp::GridRSP) = grsp.g -_get_grid(g::Grid) = g - function Base.show(io::IO, ::MIME"text/plain", grsp::GridRSP) print(io, summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) end @@ -127,14 +124,10 @@ function edge_betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; # TODO why does this only use `expected_cost`? g = grsp.g # S = map(distance_transformation, expected_cost(grsp)) - # _maybe_set_diagonal!(S, g.targetnodes, diagvalue) + # maybe_set_diagonal!(S, diagvalue, g.targetnodes) proximities = map(distance_transformation, expected_cost(grsp)) - if diagvalue !== nothing - for (j, i) in enumerate(g.targetnodes) - proximities[i, j] = diagvalue - end - end + maybe_set_diagonal!(proximities, diagvalue, g.targetnodes) betmatrix = RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) return betmatrix @@ -183,7 +176,6 @@ function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}; end return mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; kw...) end - function mean_kl_divergence(grsp::Union{GridRSP,NamedTuple}, free_energy_distances, expected_costs; workspaces=(similar(grsp.Z),), kw... ) @@ -317,7 +309,8 @@ function connected_habitat( distance_transformation=nothing, diagvalue=nothing, θ::Union{Nothing,Real}=nothing, - approx::Bool=false) + approx::Bool=false +) # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction @@ -332,7 +325,7 @@ function connected_habitat( map!(distance_transformation, S, S) end - return connected_habitat(grsp, S, diagvalue=diagvalue) + return connected_habitat(grsp, S; diagvalue) end function connected_habitat(grsp::Union{GridRSP,NamedTuple}; proximities=nothing, kw...) @@ -346,13 +339,8 @@ function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; output=fill(NaN, size(grsp.g)), kw... ) - g = _get_grid(grsp) - - if diagvalue !== nothing - for (j, i) in enumerate(g.targetnodes) - S[i, j] = diagvalue - end - end + g = grsp.g + maybe_set_diagonal(S, diagvalue, g.targetnodes) funvec = connected_habitat(g.qs, g.qt, S; kw...) @@ -471,7 +459,7 @@ function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; map!(distance_transformation, S, S) end - _maybe_set_diagonal!(S, g, diagvalue) + maybe_set_diagonal!(S, diagvalue, g.targetnodes) # quality scaled proximity matrix qSq = workspace2 .= g.qs .* S .* g.qt' @@ -616,12 +604,12 @@ function _computeproximities(grsp; end map!(distance_transformation, proximities, proximities) end - _maybe_set_diagonal!(proximities, g.targetnodes, diagvalue) + maybe_set_diagonal!(proximities, diagvalue, g.targetnodes) return proximities end -_maybe_set_diagonal!(proximities, targetnodes, diagvalue::Nothing) = nothing -function _maybe_set_diagonal!(proximities, targetnodes, diagvalue) +maybe_set_diagonal!(proximities, diagvalue::Nothing, targetnodes) = nothing +function maybe_set_diagonal!(proximities, diagvalue, targetnodes) for (j, i) in enumerate(targetnodes) proximities[i, j] = diagvalue end diff --git a/src/problem.jl b/src/problem.jl index 2432542..47a00e1 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -7,19 +7,24 @@ solver(p::AbstractProblem) = solver(p.problem) isthreaded(p::AbstractProblem) = false """ - assess(p::AbstractProblem, g) + assess(p::AbstractProblem, rast::RasterStack) -Assess the memory and solve requirements of problem -`p` on grid `g`. This can be used to indicate memory -and time reequiremtents on a cluster +Assess the computational requirements of problem +`p` for `RasterStack` `rastr`. + +This can be used to indicate memory and time reequiremtents on a cluster. """ function assess end """ Problem(graph_measures...; solver, θ) -Combine multiple solve operations into a single object, -to be run in the same job. +A `Problem` specifies graph and connectivity measures, +and a method to solve them. + +This lazy specification allows ConScape to minimise the work +required to calculate multiple outputs: habitat conectivity +betweenness metrics etc can use the same memory allocations and solves. # Keywords @@ -55,15 +60,17 @@ costs(p::Problem) = p.costs prune(p::Problem) = p.prune isthreaded(p::Problem) = p.threaded - +# Solve just calls `init` and `solve!` solve(p::Problem, rast::RasterStack; kw...) = solve!(init(p, rast; kw...), p; kw...) +# Solve defers to specific solver methods in solvers.jl solve!(workspace::NamedTuple, p::Problem; kw...) = solve!(workspace, solver(p), connectivity_measure(p), p; kw...) -# Init is conditional on solver and connectivity measure +# `init`` calls `init!` on an empty workspace +init(p::AbstractProblem, args...; kw...) = init!((;), p, args...; kw...) +# init! requirements are conditional on solver and connectivity measure +# See solvers.jl function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; verbose=false, kw...) verbose && println("Initialising for $(solver(p))") init!(workspace, solver(p), connectivity_measure(p), p, rast; kw...) end - -init(p::AbstractProblem, args...; kw...) = init!((;), p, args...; kw...) \ No newline at end of file diff --git a/src/randomizedshortestpath.jl b/src/randomizedshortestpath.jl index f0d2159..3261b16 100644 --- a/src/randomizedshortestpath.jl +++ b/src/randomizedshortestpath.jl @@ -239,14 +239,14 @@ function RSP_expected_cost(W::SparseMatrixCSC, # When threaded the solver is faster than a dense matmul # C̄ = if size(Z, 1) == size(Z, 2) - # B = mul!(workspace1, C .* W, Z) - # mul!(B, C .* W, Z) - # This is a dense-dense matmul... very slow - # Z * B + # B = mul!(workspace1, C .* W, Z) + # mul!(B, C .* W, Z) + # This is a dense-dense matmul... very slow + # Z * B # else - # TODO permuted workspace here for the broadcast - B = mul!(workspace1, CW, Z) - C̄ = ldiv!(solver, A_init, B; B_copy=copy!(workspace2, B)) + # TODO permuted workspace here for the broadcast + B = mul!(workspace1, CW, Z) + C̄ = ldiv!(solver, A_init, B; B_copy=copy!(workspace2, B)) # end C̄ ./= Z @@ -479,4 +479,4 @@ function _bellman_ford_update_node_transposed(c̄::Vector, φ::Vector, trPref::S ec += pij * (trCidxjindex + c̄[idx[j]]) end return ec, v -end +end \ No newline at end of file From f65161c2e5add824e97bef735851cda74bd10930 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 18 Feb 2025 17:40:18 +0100 Subject: [PATCH 35/51] resorganise bugfix and test (re)assessments --- src/ConScape.jl | 1 + src/assessment.jl | 201 ++++++++++++++++++++++++++++++++++++++++++++++ src/gridrsp.jl | 6 +- src/problem.jl | 10 --- src/solvers.jl | 2 +- src/windows.jl | 155 +---------------------------------- test/windowed.jl | 57 ++++++++++++- 7 files changed, 264 insertions(+), 168 deletions(-) create mode 100644 src/assessment.jl diff --git a/src/ConScape.jl b/src/ConScape.jl index ede890d..f1957af 100644 --- a/src/ConScape.jl +++ b/src/ConScape.jl @@ -54,5 +54,6 @@ include("connectivity_measure.jl") include("problem.jl") include("solvers.jl") include("windows.jl") +include("assessment.jl") end diff --git a/src/assessment.jl b/src/assessment.jl new file mode 100644 index 0000000..1ac2b58 --- /dev/null +++ b/src/assessment.jl @@ -0,0 +1,201 @@ +""" + ProblemAssessment + +Abstract supertype for problem assessments. + +These calculate the computation size of an +`AbstractWindowedProblem` for a specific `RasterStack`. +""" +abstract type ProblemAssessment end + +Base.size(a::ProblemAssessment) = a.size + +""" + WindowAssessment <: ProblemAssessment + +Assessment of an AbstractWindowedProblem that holds +a `Problem`. + +# Fields +- `shape::Tuple{Int,Int}`: the shape of the windowing +- `njobs::Int`: the number of problem runs required to finish the problem +- `sizes::Vector{Tuple{Int,Int}}`: the sizes of each window +- `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run. +- `indices::Vector{Int}`: the indices of `mask` that are `true`. +""" +@kwdef struct WindowAssessment <: ProblemAssessment + size::Tuple{Int,Int} + shape::Tuple{Int,Int} + njobs::Int + mask::Vector{Bool} + indices::Vector{Int} + sizes::Vector{Tuple{Int,Int}} +end + +""" + NestedAssessment <: ProblemAssessment + +Assessment of a nested `AbstractWindowedProblem`, +that holds another `AbstractWindowedProblem`. + +# Fields +- `shape::Tuple{Int,Int}`: the shape of the windowing +- `njobs::Int`: the number of problem runs required to finish the problem +- `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run. +- `indices::Vector{Int}`: the indices of `mask` that are `true`. +- `assessments::Vector{WindowAssessment}`: asessments at the next level down. +""" +@kwdef struct NestedAssessment <: ProblemAssessment + size::Tuple{Int,Int} + shape::Tuple{Int,Int} + njobs::Int + mask::Vector{Bool} + indices::Vector{Int} + assessments::Vector{WindowAssessment} +end + +function Base.show(io::IO, mime::MIME"text/plain", bs::ProblemAssessment) + println(io, "NestedAssessment") + println(io) + println(io, "Raster shape: $(bs.shape)") + println(io, "Number of jobs: $(bs.njobs)") + # Use SparseArrays nice matrix printing for the mask + println(io, "Job mask: ") + mask = sparse(reshape(bs.mask, bs.shape)) + Base.print_array(io, mask) +end + + +""" + assess(p::AbstractProblem, rast::RasterStack) + +Assess the computational requirements of problem +`p` for `RasterStack` `rastr`. + +This can be used to indicate memory and time reequiremtents on a cluster. +""" +function assess end + +function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; kw...) + # Define the ranges of each window + window_ranges = _window_ranges(p, rast) + + # Calculate window sizes and allocations + window_sizes = map(vec(window_ranges)) do rs + window_rast = view(rast, rs...) + _problem_size(p, window_rast) + end + + # Organise stats for each window into vectors + window_mask = map(s -> prod(s) > 0, window_sizes) + window_indices = eachindex(window_mask)[window_mask] + + # Calculate global stats + njobs = count(window_mask) + shape = size(window_ranges) + + WindowAssessment(size(rast), shape, njobs, window_mask, window_indices, window_sizes) +end +function assess( + p::AbstractWindowedProblem{<:AbstractWindowedProblem}, + rast::AbstractRasterStack; + nthreads=Threads.nthreads(), + verbose=true, + kw... +) + # Calculate outer window ranges + window_ranges = _window_ranges(p, rast) + verbose && println("Assessing $(length(window_ranges)) jobs") + + # Define a channel to store window raster and reuse memory + channel = Channel{Any}(Threads.nthreads()) + open(rast) do o + for i in 1:nthreads + put!(channel, _get_window_with_zeroed_buffer(getindex, p, o, first(window_ranges))) + end + end + + # Define a vector for all assessment data + assessments = Vector{WindowAssessment}(undef, length(window_ranges)) + # Run assessments threaded as they can take a long time for large rasters + Threads.@threads for i in eachindex(vec(window_ranges)) + rs = window_ranges[i] + verbose && println("Assessing batch: $i, $rs") + window_rast = take!(channel) + function empty_assesment() + verbose && println(" No targets found") + WindowAssessment(; + shape=(0, 0), + njobs=0, + sizes=Tuple{Int,Int}[], + mask=Bool[], + indices=Int[], + ) + end + # Just load the target window quickly first to avoid loading large rasters + window_view = view(rast, rs...) + quick_targets = window_view.target_qualities[_target_ranges(p, window_view)...] + assessments[i] = if count(_isvalid, quick_targets) > 0 + # TODO + window_rast = open(rast) do o + if map(length, rs) == size(window_rast) + _get_window_with_zeroed_buffer!(window_rast, p, o, rs) + else + _get_window_with_zeroed_buffer(getindex, p, o, rs) + end + end + nvalid = count(_isvalid, window_rast.target_qualities) + if nvalid > 0 + verbose && println(" nvalid: $nvalid") + assess(p.problem, window_rast; nthreads, kw...) + else + empty_assesment() + end + else + empty_assesment() + end + put!(channel, window_rast) + end + # Get mask and indices + mask = map(a -> any(a.mask), assessments) + indices = eachindex(vec(mask))[mask] + # Calculate global stats + njobs = count(mask) + shape = size(window_ranges) + return NestedAssessment(size(rast), shape, njobs, mask, indices, assessments) +end + +""" + reassess(a::NestedAssessment, p::BatchProblem) + +Re-asses an existing nested assesment of a BatchProblem. + +The returned `NestedAssessment` will exclude any jobs that +already have a data folder (assumed to be successfully completed). +""" +function reassess(p::BatchProblem, a::NestedAssessment) + (; njobs, mask, indices) = _reassess(p, a) + assessments = a.assessments[indices] + return NestedAssessment(a.size, a.shape, njobs, mask, indices, assessments) +end +function reassess(p::BatchProblem, a::WindowAssessment) + (; njobs, mask, indices) = _reassess(p, a) + sizes = a.sizes[indices] + return WindowAssessment(a.size, a.shape, njobs, mask, indices, sizes) +end + +function _reassess(p, a) + # Paths for all batches + paths = _batch_paths(p, size(a)) + # Paths for non-empty batches + jobpaths = paths[a.indices] + # Find all the jobs that havent been saved (failed) + idxmask = .!(isdir.(jobpaths)) + # Generate new arrays of indices and assessments for the remaining jobs + indices = a.indices[idxmask] + mask = fill(false, prod(a.shape)) + mask[indices] .= true + njobs = length(indices) + return (; njobs, mask, indices) +end + diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 56a3c10..34a620d 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -340,7 +340,7 @@ function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; kw... ) g = grsp.g - maybe_set_diagonal(S, diagvalue, g.targetnodes) + maybe_set_diagonal!(S, diagvalue, g.targetnodes) funvec = connected_habitat(g.qs, g.qt, S; kw...) @@ -608,8 +608,8 @@ function _computeproximities(grsp; return proximities end -maybe_set_diagonal!(proximities, diagvalue::Nothing, targetnodes) = nothing -function maybe_set_diagonal!(proximities, diagvalue, targetnodes) +maybe_set_diagonal!(proximities, diagvalue::Nothing, targetnodes::AbstractVector) = nothing +function maybe_set_diagonal!(proximities, diagvalue, targetnodes::AbstractVector) for (j, i) in enumerate(targetnodes) proximities[i, j] = diagvalue end diff --git a/src/problem.jl b/src/problem.jl index 47a00e1..9b8bc7d 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -6,16 +6,6 @@ connectivity_function(p::AbstractProblem) = solver(p::AbstractProblem) = solver(p.problem) isthreaded(p::AbstractProblem) = false -""" - assess(p::AbstractProblem, rast::RasterStack) - -Assess the computational requirements of problem -`p` for `RasterStack` `rastr`. - -This can be used to indicate memory and time reequiremtents on a cluster. -""" -function assess end - """ Problem(graph_measures...; solver, θ) diff --git a/src/solvers.jl b/src/solvers.jl index 0653776..4f0221e 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -564,7 +564,7 @@ function _setproximities!( else map!(dt, proximities, expected_costs) end - _maybe_set_diagonal!(proximities, g, diagvalue(p)) + maybe_set_diagonal!(proximities, diagvalue(p), g.targetnodes) return proximities end diff --git a/src/windows.jl b/src/windows.jl index dd62d2e..caf22a6 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -289,125 +289,6 @@ function init!(workspace::NamedTuple, p::BatchProblem, i::Int; verbose=true) verbose && @show size(batch_rast) return (; rast=batch_rast, workspace=init(p.problem, batch_rast; verbose), batch=1, window) end - -function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; kw...) - # Define the ranges of each window - window_ranges = _window_ranges(p, rast) - - # Calculate window sizes and allocations - window_sizes = map(vec(window_ranges)) do rs - window_rast = view(rast, rs...) - _problem_size(p, window_rast) - end - - # Organise stats for each window into vectors - window_mask = map(s -> prod(s) > 0, window_sizes) - window_indices = eachindex(window_mask)[window_mask] - - # Calculate global stats - njobs = count(window_mask) - shape = size(window_ranges) - - WindowAssessment( - shape, - njobs, - window_sizes, - window_mask, - window_indices, - ) -end -function assess( - p::AbstractWindowedProblem{<:AbstractWindowedProblem}, - rast::AbstractRasterStack; - nthreads=Threads.nthreads(), - verbose=true, - kw... -) - # Calculate outer window ranges - window_ranges = _window_ranges(p, rast) - verbose && println("Assessing $(length(window_ranges)) jobs") - - # Define a channel to store window raster and reuse memory - channel = Channel{Any}(Threads.nthreads()) - open(rast) do o - for i in 1:nthreads - put!(channel, _get_window_with_zeroed_buffer(getindex, p, o, first(window_ranges))) - end - end - - # Define a vector for all assessment data - assessments = Vector{WindowAssessment}(undef, length(window_ranges)) - # Run assessments threaded as they can take a long time for large rasters - Threads.@threads for i in eachindex(vec(window_ranges)) - rs = window_ranges[i] - verbose && println("Assessing batch: $i, $rs") - window_rast = take!(channel) - function empty_assesment() - verbose && println(" No targets found") - WindowAssessment(; - shape=(0, 0), - njobs=0, - sizes=Tuple{Int,Int}[], - mask=Bool[], - indices=Int[], - ) - end - # Just load the target window quickly first to avoid loading large rasters - window_view = view(rast, rs...) - quick_targets = window_view.target_qualities[_target_ranges(p, window_view)...] - assessments[i] = if count(_isvalid, quick_targets) > 0 - # TODO - window_rast = open(rast) do o - if map(length, rs) == size(window_rast) - _get_window_with_zeroed_buffer!(window_rast, p, o, rs) - else - _get_window_with_zeroed_buffer(getindex, p, o, rs) - end - end - nvalid = count(_isvalid, window_rast.target_qualities) - if nvalid > 0 - verbose && println(" nvalid: $nvalid") - assess(p.problem, window_rast; nthreads, kw...) - else - empty_assesment() - end - else - empty_assesment() - end - put!(channel, window_rast) - end - # Get mask and indices - mask = map(a -> any(a.mask), assessments) - indices = eachindex(vec(mask))[mask] - # Calculate global stats - njobs = count(mask) - shape = size(window_ranges) - return NestedAssessment(shape, njobs, mask, indices, assessments) -end - -""" - reassess(a::NestedAssessment, p::BatchProblem) - -Re-asses an existing nested assesment of a BatchProblem. - -""" -function reassess(a::NestedAssessment, p::BatchProblem) - # Paths for all batches - paths = _batch_paths(p, size(a)) - # Paths for non-empty batches - jobpaths = paths[a.indices] - # Find all the jobs that havent been saved (failed) - idxmask = .!(isdir.(jobpaths)) - # Generate new arrays of indices and assessments for the remaining jobs - indices = a.indices[idxmask] - mask = falses(a.shape) - mask[indices] .= true - assessments = a.assesments[idxmask] - njobs = lenth(indices) - - return NestedAssessment(shape, njobs, mask, indices, assessments) -end - # Mosaic the stored files to a RasterStack function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) paths = _batch_paths(p, to) @@ -416,13 +297,13 @@ function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) end function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where {K} - dir = mkpath(_window_path(p, ranges)) + dir = mkpath(_batch_path(p, ranges)) return Rasters.write(joinpath(dir, ""), output; ext=p.ext, force=true, verbose=false, kw... ) end -_batch_paths(p, x::Union{RaterStack,Tuple}; window_ranges=_window_ranges(p, x)) = +_batch_paths(p, x::Union{RasterStack,Tuple}; window_ranges=_window_ranges(p, x)) = [_batch_path(p, rs) for rs in window_ranges] function _batch_path(p, ranges::Tuple) @@ -530,34 +411,4 @@ end _isvalid(x) = !isnan(x) && x > zero(x) -_resolution(rast) = abs(step(lookup(rast, X))) - -abstract type ProblemAssessment end - -@kwdef struct WindowAssessment <: ProblemAssessment - shape::Tuple{Int,Int} - njobs::Int - sizes::Vector{Tuple{Int,Int}} - mask::Vector{Bool} - indices::Vector{Int} -end - -@kwdef struct NestedAssessment <: ProblemAssessment - shape::Tuple{Int,Int} - njobs::Int - mask::Vector{Bool} - indices::Vector{Int} - assessments::Vector{WindowAssessment} -end - -Base.size(a::AbstractAssessment) = a.shape -function Base.show(io::IO, mime::MIME"text/plain", bs::ProblemAssessment) - println(io, "NestedAssessment") - println(io) - println(io, "Raster shape: $(bs.shape)") - println(io, "Number of jobs: $(bs.njobs)") - # Use SparseArrays nice matrix printing for the mask - println(io, "Job mask: ") - mask = sparse(reshape(bs.mask, bs.shape)) - Base.print_array(io, mask) -end \ No newline at end of file +_resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file diff --git a/test/windowed.jl b/test/windowed.jl index fcde9a9..e0e30f4 100644 --- a/test/windowed.jl +++ b/test/windowed.jl @@ -77,7 +77,7 @@ end # BatchProblem writes files to disk and mosaics to RasterStack -# @testset "batch problem matches windowed problem" begin +@testset "batch problem matches windowed problem" begin solver = ConScape.VectorSolver() # Use a higher alpha to catch differences distance_transformation = x -> exp(-x / 50) @@ -108,6 +108,34 @@ end end batch_jobs_result = mosaic(batch_jobs_problem; to=rast) + + @testset "reassessment" begin + # There should be no jobs left + re1 = ConScape.reassess(batch_jobs_problem, assessment) + @test re1.njobs == 0 + @test length(re1.indices) == 0 + + # Delete three results + paths = ConScape._batch_paths(batch_jobs_problem, size(assessment)) + rm.(paths[[1, 7, 21]]; recursive=true) + re2 = ConScape.reassess(batch_jobs_problem, assessment) + @test re2.njobs == 3 + @test length(re2.indices) == 3 + @test re2.mask[[1, 7, 21]] == [true, true, true] + + # Run the reassessment + for job in 1:re2.njobs + ConScape.solve(batch_jobs_problem, rast, job; window_indices=re2.indices) + end + + # Again there are no jobs left + re3 = ConScape.reassess(batch_jobs_problem, assessment) + @test re3.njobs == 0 + @test length(re3.indices) == 0 + @test count(re3.mask) == 0 + end + + nested_problem = ConScape.BatchProblem(windowed_problem; datapath=tempname(), centersize=(10, 10) ) @@ -128,7 +156,32 @@ end ConScape.solve(nested_jobs_problem, rast, job) end nested_jobs_result = mosaic(nested_jobs_problem; to=rast) - plot(windowed_result) + + @testset "nested reassessment" begin + # There should be no jobs left + re1 = ConScape.reassess(nested_jobs_problem, assessment) + @test re1.njobs == 0 + @test length(re1.indices) == 0 + + # Delete three results + paths = ConScape._batch_paths(nested_jobs_problem, size(assessment)) + rm.(paths[[2, 5]]; recursive=true) + re2 = ConScape.reassess(nested_jobs_problem, assessment) + @test re2.njobs == 2 + @test length(re2.indices) == 2 + @test re2.mask[[2, 5]] == [true, true] + + # Run the reassessment + for job in 1:re2.njobs + ConScape.solve(nested_jobs_problem, rast, job; window_indices=re2.indices) + end + + # Again there are no jobs left + re3 = ConScape.reassess(nested_jobs_problem, assessment) + @test re3.njobs == 0 + @test length(re3.indices) == 0 + @test count(re3.mask) == 0 + end @test keys(windowed_result) == keys(nested_result) == From 26eb243e4bbc3a7c1636198f9f1107dd0ebd532f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 19 Feb 2025 14:43:57 +0100 Subject: [PATCH 36/51] assess performance fixes --- src/assessment.jl | 42 ++++++++++++++---------------------------- src/solvers.jl | 40 ++++++++++++++++++++++------------------ src/windows.jl | 14 +++++++++++--- 3 files changed, 47 insertions(+), 49 deletions(-) diff --git a/src/assessment.jl b/src/assessment.jl index 1ac2b58..c7d8aa3 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -107,24 +107,16 @@ function assess( window_ranges = _window_ranges(p, rast) verbose && println("Assessing $(length(window_ranges)) jobs") - # Define a channel to store window raster and reuse memory - channel = Channel{Any}(Threads.nthreads()) - open(rast) do o - for i in 1:nthreads - put!(channel, _get_window_with_zeroed_buffer(getindex, p, o, first(window_ranges))) - end - end - # Define a vector for all assessment data assessments = Vector{WindowAssessment}(undef, length(window_ranges)) # Run assessments threaded as they can take a long time for large rasters Threads.@threads for i in eachindex(vec(window_ranges)) rs = window_ranges[i] verbose && println("Assessing batch: $i, $rs") - window_rast = take!(channel) - function empty_assesment() + function empty_assesment(size) verbose && println(" No targets found") WindowAssessment(; + size, shape=(0, 0), njobs=0, sizes=Tuple{Int,Int}[], @@ -134,27 +126,21 @@ function assess( end # Just load the target window quickly first to avoid loading large rasters window_view = view(rast, rs...) - quick_targets = window_view.target_qualities[_target_ranges(p, window_view)...] - assessments[i] = if count(_isvalid, quick_targets) > 0 - # TODO - window_rast = open(rast) do o - if map(length, rs) == size(window_rast) - _get_window_with_zeroed_buffer!(window_rast, p, o, rs) - else - _get_window_with_zeroed_buffer(getindex, p, o, rs) - end - end - nvalid = count(_isvalid, window_rast.target_qualities) - if nvalid > 0 - verbose && println(" nvalid: $nvalid") - assess(p.problem, window_rast; nthreads, kw...) - else - empty_assesment() + target_ranges = _target_ranges(p, window_view) + # Convert targets to bool as early as possible + inner_target_bools = _isvalid.(window_view.target_qualities[target_ranges...]) + assessments[i] = if count(inner_target_bools) > 0 + window_bools = open(window_view) do o + # Convert everything to Bool up front + qualities = collect(_isvalid.(o.qualities)) + target_qualities = falses(size(o)) + target_qualities[target_ranges...] .= inner_target_bools + RasterStack((; qualities, target_qualities), dims(window_view)) end + assess(p.problem, window_bools; nthreads, kw...) else - empty_assesment() + empty_assesment(size(window_view)) end - put!(channel, window_rast) end # Get mask and indices mask = map(a -> any(a.mask), assessments) diff --git a/src/solvers.jl b/src/solvers.jl index 4f0221e..d998f0a 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -61,16 +61,16 @@ end LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) # In `init!` we allocate all large dense arrays -function init!( - ws::NamedTuple, - solver::MatrixSolver, - cm::FundamentalMeasure, - p::AbstractProblem, - rast::RasterStack; - verbose=false, -) - _init!(ws, solver, cm, p, rast; verbose) -end +# function init!( +# ws::NamedTuple, +# solver::MatrixSolver, +# cm::FundamentalMeasure, +# p::AbstractProblem, +# rast::RasterStack; +# verbose=false, +# ) +# _init!(ws, solver, cm, p, rast; verbose) +# end function init!( ws::NamedTuple, solver::Union{VectorSolver,LinearSolver}, @@ -79,6 +79,8 @@ function init!( rast::RasterStack; verbose=false, ) + @show "initing" isthreaded(solver) + grid = Grid(p, rast) workspace = _init!(ws, solver, cm, p, rast; verbose) if isthreaded(solver) @@ -86,6 +88,7 @@ function init!( channel = Channel{typeof(workspace)}(nbuffers) put!(channel, workspace) for n in 2:nbuffers + @show n workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) put!(channel, workspace_N) end @@ -94,6 +97,7 @@ function init!( return workspace end end + function _init!( ws::NamedTuple, solver::Solver, @@ -184,14 +188,14 @@ function _init!( return (; Z, Zⁱ, workspaces, permuted_workspaces, g=grid, grid, free_energy_distances, expected_costs, proximities, outputs) end -function init!( - workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; - verbose=false, - grid=Grid(p, rast), -) - # TODO what is needed here? - return (; grid) -end +# function init!( +# workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; +# verbose=false, +# grid=Grid(p, rast), +# ) +# # TODO what is needed here? +# return (; grid) +# end # RSP is not used for ConnectivityMeasure, so the solver isn't used function solve!( diff --git a/src/windows.jl b/src/windows.jl index caf22a6..ccbd9ac 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -140,11 +140,16 @@ function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; n = min(length(window_indices), p.threaded ? Threads.nthreads() : 1) # VERY important to use _get_window_with_zeroed_buffer here not just index the raster # Otherwise memory use will be TB - window_workspaces = if haskey(workspace, :window_workspaces) - [init!(ws, p.problem; verbose) for ws in window_workspaces] + window_workspaces = Vector{NamedTuple}(undef, n) + if haskey(workspace, :window_workspaces) + Threads.@threads for i in 1:n + window_workspaces[i] = init!(workspace.window_workspaces[i], p.problem; verbose) + end else largest_rast = _get_window_with_zeroed_buffer(view, p, rast, window_ranges[first(sorted_indices)]) - [init(p.problem, largest_rast; verbose) for _ in 1:n] + Threads.@threads for i in 1:n + window_workspaces[i] = init(p.problem, largest_rast; verbose) + end end return (; rast, window_workspaces, window_sizes, window_ranges, window_indices, sorted_indices) end @@ -274,6 +279,8 @@ function init(p::BatchProblem, rast::RasterStack, i::Int; init!((; rast, window_ranges, window_indices), p, i; kw...) end function init!(workspace::NamedTuple, p::BatchProblem, i::Int; verbose=true) + @show "here" + @show "Initialising batch problem" (; window_indices, window_ranges, rast) = workspace # Get the raster data for job i window = window_ranges[window_indices[i]] @@ -410,5 +417,6 @@ function _valid_targets( end _isvalid(x) = !isnan(x) && x > zero(x) +_isvalid(x::Bool) = x _resolution(rast) = abs(step(lookup(rast, X))) \ No newline at end of file From 523de8308c525b90f4827e6e2166cfcd192efbd9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 22 Feb 2025 16:45:50 +0100 Subject: [PATCH 37/51] bugfixes for windows --- src/assessment.jl | 25 ++++--- src/grid.jl | 40 +++++------ src/solvers.jl | 28 ++++---- src/windows.jl | 169 +++++++++++++++++++++++++++------------------- 4 files changed, 143 insertions(+), 119 deletions(-) diff --git a/src/assessment.jl b/src/assessment.jl index c7d8aa3..11acbbb 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -17,9 +17,10 @@ Assessment of an AbstractWindowedProblem that holds a `Problem`. # Fields +- `size::Tuple{Int,Int}`: the size of the input and output RasterStack - `shape::Tuple{Int,Int}`: the shape of the windowing - `njobs::Int`: the number of problem runs required to finish the problem -- `sizes::Vector{Tuple{Int,Int}}`: the sizes of each window +- `grid_sizes::Vector{Tuple{Int,Int}}`: the sizes of each window - `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run. - `indices::Vector{Int}`: the indices of `mask` that are `true`. """ @@ -29,7 +30,7 @@ a `Problem`. njobs::Int mask::Vector{Bool} indices::Vector{Int} - sizes::Vector{Tuple{Int,Int}} + grid_sizes::Vector{Tuple{Int,Int}} end """ @@ -39,6 +40,7 @@ Assessment of a nested `AbstractWindowedProblem`, that holds another `AbstractWindowedProblem`. # Fields +- `size::Tuple{Int,Int}`: the size of the `RasterStack` input and output. - `shape::Tuple{Int,Int}`: the shape of the windowing - `njobs::Int`: the number of problem runs required to finish the problem - `mask::Vector{Bool}`: Vector{Bool} where `true` values are jobs that need to be run. @@ -81,20 +83,17 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack window_ranges = _window_ranges(p, rast) # Calculate window sizes and allocations - window_sizes = map(vec(window_ranges)) do rs - window_rast = view(rast, rs...) - _problem_size(p, window_rast) - end + grid_sizes = vec(_estimate_grid_sizes(p, rast; window_ranges)) # Organise stats for each window into vectors - window_mask = map(s -> prod(s) > 0, window_sizes) - window_indices = eachindex(window_mask)[window_mask] + window_mask = map(s -> prod(s) > 0, grid_sizes) + non_empty_indices = eachindex(window_mask)[window_mask] # Calculate global stats njobs = count(window_mask) shape = size(window_ranges) - WindowAssessment(size(rast), shape, njobs, window_mask, window_indices, window_sizes) + WindowAssessment(size(rast), shape, njobs, window_mask, non_empty_indices, grid_sizes) end function assess( p::AbstractWindowedProblem{<:AbstractWindowedProblem}, @@ -119,7 +118,7 @@ function assess( size, shape=(0, 0), njobs=0, - sizes=Tuple{Int,Int}[], + grid_sizes=Tuple{Int,Int}[], mask=Bool[], indices=Int[], ) @@ -130,8 +129,8 @@ function assess( # Convert targets to bool as early as possible inner_target_bools = _isvalid.(window_view.target_qualities[target_ranges...]) assessments[i] = if count(inner_target_bools) > 0 + # Convert everything to Bool at the batch level so window assessments are fast window_bools = open(window_view) do o - # Convert everything to Bool up front qualities = collect(_isvalid.(o.qualities)) target_qualities = falses(size(o)) target_qualities[target_ranges...] .= inner_target_bools @@ -144,11 +143,11 @@ function assess( end # Get mask and indices mask = map(a -> any(a.mask), assessments) - indices = eachindex(vec(mask))[mask] + non_empty_indices = eachindex(vec(mask))[mask] # Calculate global stats njobs = count(mask) shape = size(window_ranges) - return NestedAssessment(size(rast), shape, njobs, mask, indices, assessments) + return NestedAssessment(size(rast), shape, njobs, mask, non_empty_indices, assessments) end """ diff --git a/src/grid.jl b/src/grid.jl index 888fffc..edd02c3 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -1,19 +1,3 @@ -struct Grid{D<:Union{Tuple,Nothing},SQ,TQ} - nrows::Int - ncols::Int - affinities::SparseMatrixCSC{Float64,Int} - costfunction::Union{Nothing,Transformation} - costmatrix::SparseMatrixCSC{Float64,Int} - id_to_grid_coordinate_list::Vector{CartesianIndex{2}} - source_qualities::SQ - target_qualities::TQ - targetidx::Vector{CartesianIndex{2}} - targetnodes::Vector{Int} - qs::Vector{Float64} - qt::Vector{Float64} - dims::D -end - """ Grid(nrows::Integer, ncols::Integer; @@ -32,6 +16,21 @@ a `costs` function that maps the `affinities` matrix to a `costs` matrix. Alternatively, it is possible to supply a matrix to `costs` directly. If `prune=true` (the default), the affinity and cost matrices will be pruned to exclude unreachable nodes. """ +struct Grid{D<:Union{Tuple,Nothing},SQ,TQ} + nrows::Int + ncols::Int + affinities::SparseMatrixCSC{Float64,Int} + costfunction::Union{Nothing,Transformation} + costmatrix::SparseMatrixCSC{Float64,Int} + id_to_grid_coordinate_list::Vector{CartesianIndex{2}} + source_qualities::SQ + target_qualities::TQ + targetidx::Vector{CartesianIndex{2}} + targetnodes::Vector{Int} + qs::Vector{Float64} + qt::Vector{Float64} + dims::D +end function Grid(nrows::Integer, ncols::Integer; affinities=nothing, @@ -114,10 +113,7 @@ function Grid(rast::RasterStack; qualities=get(rast, :qualities) do ones(size(rast)) end, - affinities=let - affinities_raster = get(rast, :affinities, nothing) - ConScape.graph_matrix_from_raster(affinities_raster) - end, + affinities=ConScape.graph_matrix_from_raster(rast.affinities), source_qualities=get(rast, :source_qualities, qualities), target_qualities=get(rast, :target_qualities, qualities), kw... @@ -128,6 +124,10 @@ end Grid(p::AbstractProblem, rast::RasterStack; kw...) = Grid(rast; costs=costs(p), prune=prune(p), kw...) + +# TODO: clarify this +target_size(g::Grid) = size(g.costmatrix, 1), length(g.targetnodes) + Base.size(g::Grid) = (g.nrows, g.ncols) DimensionalData.dims(g::Grid) = g.dims diff --git a/src/solvers.jl b/src/solvers.jl index d998f0a..1c22630 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -79,16 +79,15 @@ function init!( rast::RasterStack; verbose=false, ) - @show "initing" isthreaded(solver) - - grid = Grid(p, rast) - workspace = _init!(ws, solver, cm, p, rast; verbose) + # Initialise the whole grid + grid = Grid(p, rast; prune=false) + # Initialise the workspace + workspace = _init!(ws, solver, cm, p, grid; verbose) if isthreaded(solver) nbuffers = Thread.nthreads() channel = Channel{typeof(workspace)}(nbuffers) put!(channel, workspace) for n in 2:nbuffers - @show n workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) put!(channel, workspace_N) end @@ -103,9 +102,8 @@ function _init!( solver::Solver, cm::FundamentalMeasure, p::AbstractProblem, - rast::RasterStack; + grid::Grid; verbose=false, - grid=Grid(p, rast) ) verbose && println("Retreiving measures...") g = grid @@ -146,8 +144,8 @@ function _init!( expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost haskey(ws, :expected_costs) ? _reshape(ws.expected_costs, size(Z)) : similar(Z) else - nothing - end + end + free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance haskey(ws, :free_energy_distances) ? _reshape(ws.free_energy_distances, size(Z)) : similar(Z) else @@ -160,7 +158,7 @@ function _init!( end function matrix_or_nothing(gm) if returntype(gm) isa ReturnsDenseSpatial - A = fill(NaN, size(rast)) + A = fill(NaN, size(grid)) A[grid.id_to_grid_coordinate_list] .= 0.0 A else @@ -233,10 +231,10 @@ function solve!( # Get grid and preallocated vectors (; g) = ws gms = graph_measures(p) - # Predefine min-vectors for targets (not worth putting in the workspace) - target_qualities = g.target_qualities[g.targetnodes[1]] - targetidx = g.targetidx[1:1] + # Predefine min-vectors targets (not worth putting in the workspace) ? targetnodes = g.targetnodes[1:1] + target_qualities = g.target_qualities[targetnodes[1]] + targetidx = g.targetidx[1:1] qt = g.qt[1:1] target_allocs = (; target_qualities, targetidx, targetnodes, qt) _update_targets!(target_allocs, g, 1) @@ -411,9 +409,9 @@ function _init_sparse(ws::NamedTuple, solver, cm, p::Problem, grid::Grid; verbos end # All targets at once -_workspace_size(::MatrixSolver, g) = size(g.costmatrix, 1), length(g.targetnodes) +_workspace_size(::MatrixSolver, g) = target_size(g) # One target at a time -_workspace_size(::Union{VectorSolver,LinearSolver}, g) = size(g.costmatrix, 1), 1 +_workspace_size(::Union{VectorSolver,LinearSolver}, g) = first(target_size(g)), 1 isthreaded(s::Solver) = false isthreaded(s::LinearSolver) = s.threaded diff --git a/src/windows.jl b/src/windows.jl index ccbd9ac..6797c41 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -49,16 +49,17 @@ function solve(p::WindowedProblem, rast::RasterStack; verbose, test_windows, mosaic_return, timed ) end -function solve!(workspace, p::WindowedProblem; +solve!(workspace::Missing, p::WindowedProblem; kw...) = missing +function solve!(workspace::NamedTuple, p::WindowedProblem; test_windows::Bool=false, - verbose::Bool=false, mosaic_return::Bool=true, timed=false, + verbose::Bool=false, ) - (; rast, window_workspaces, window_ranges, window_indices, sorted_indices) = workspace + (; rast, window_workspaces, window_ranges, selected_window_indices, sorted_indices) = workspace # Test outputs just return the inputs after window masking if test_windows - output_stacks = map(window_indices) do i + output_stacks = map(selected_window_indices) do i _get_window_with_zeroed_buffer(view, p, rast, window_ranges[i]) end return if mosaic_return @@ -68,7 +69,7 @@ function solve!(workspace, p::WindowedProblem; else output_stacks end - end + end ch = Channel{NamedTuple}(length(window_workspaces)) for ws in window_workspaces @@ -76,7 +77,7 @@ function solve!(workspace, p::WindowedProblem; end # Set up channels for threading # Define empty outputs - output_stacks = Vector{RasterStack}(undef, length(sorted_indices)) + output_stacks = Vector{Union{RasterStack,Missing}}(undef, length(sorted_indices)) # Define a runner for threaded/non-threaded operation function run(i, iw) # Get a window range @@ -91,7 +92,12 @@ function solve!(workspace, p::WindowedProblem; workspace_initialised = init!(workspace, p.problem, window_rast; verbose) # Solve for the window verbose && println("Solving window $window...") - output_stacks[i] = solve!(workspace_initialised, p.problem) + grid = workspace_initialised.grid + output_stacks[i] = if prod(target_size(grid)) > 0 + solve!(workspace_initialised, p.problem) + else + missing + end # Return the workspace to the channel put!(ch, workspace) end @@ -113,33 +119,41 @@ function solve!(workspace, p::WindowedProblem; # Maybe mosaic the output return if mosaic_return t = time() - result = Rasters.mosaic(sum, output_stacks; to=rast, missingval=0.0, verbose) - mosaic_elapsed = time() - t - if timed - return (; result, window_elapsed, mosaic_elapsed) + non_missing_output = collect(skipmissing(output_stacks)) + if length(non_missing_output) > 0 + result = Rasters.mosaic(sum, non_missing_output; to=rast, missingval=0.0, verbose) + mosaic_elapsed = time() - t + if timed + (; result, window_elapsed, mosaic_elapsed) + else + result + end else - return result + missing end else if timed - return (; result=output_stacks, window_elapsed) + (; result=output_stacks, window_elapsed) else - return output_stacks + output_stacks end end end + init(p::WindowedProblem, rast::RasterStack; kw...) = init!((;), p, rast; kw...) function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; window_ranges=_window_ranges(p, rast), - window_sizes=_window_sizes(p, rast; window_ranges), - window_indices=_window_indices(p, rast; window_ranges), - sorted_indices=last.(sort!(prod.(window_sizes[window_indices]) .=> window_indices; rev=true)), + grid_sizes=nothing, + selected_window_indices=nothing, verbose=true, ) - n = min(length(window_indices), p.threaded ? Threads.nthreads() : 1) - # VERY important to use _get_window_with_zeroed_buffer here not just index the raster - # Otherwise memory use will be TB + grid_sizes = isnothing(grid_sizes) ? _estimate_grid_sizes(p, rast; window_ranges) : grid_sizes + selected_window_indices = isnothing(selected_window_indices) ? _select_indices(p, rast; window_ranges, grid_sizes) : selected_window_indices + sorted_indices = last.(sort!(prod.(grid_sizes[selected_window_indices]) .=> selected_window_indices; rev=true)) + length(sorted_indices) > 0 || return missing + + n = min(length(selected_window_indices), p.threaded ? Threads.nthreads() : 1) window_workspaces = Vector{NamedTuple}(undef, n) if haskey(workspace, :window_workspaces) Threads.@threads for i in 1:n @@ -151,25 +165,28 @@ function init!(workspace::NamedTuple, p::WindowedProblem, rast::RasterStack; window_workspaces[i] = init(p.problem, largest_rast; verbose) end end - return (; rast, window_workspaces, window_sizes, window_ranges, window_indices, sorted_indices) + return (; rast, window_workspaces, grid_sizes, window_ranges, selected_window_indices, sorted_indices) end -function _max_window_problem_size(p::AbstractWindowedProblem, rast; kw...) - sizes = _window_problem_sizes(p, rast; kw...) +function _max_estimated_grid_size(p::AbstractWindowedProblem, rast; kw...) + sizes = _estimate_grid_sizes(p, rast; kw...) _, i = findmax(prod, sizes) return sizes[i] end + # Calculate the maximum number of source and target values in any window -function _window_problem_sizes(p::AbstractWindowedProblem, rast; +function _estimate_grid_sizes(p::AbstractWindowedProblem, rast; window_ranges=_window_ranges(p, rast) ) # Calculate the maximum number of source and target values in any window - return map(r -> _problem_size(p, rast, r), window_ranges) + return map(r -> _estimate_grid_size(p, rast, r), window_ranges) end -_problem_size(p::AbstractProblem, rast) = _problem_size(p, rast, axes(rast)) -function _problem_size(p::AbstractProblem, rast, ranges::Tuple) +# This function extimates problem size without actually constructing grids. +# It cant be too small, but may be too large +_estimate_grid_size(p::AbstractProblem, rast) = _estimate_grid_size(p, rast, axes(rast)) +function _estimate_grid_size(p::AbstractProblem, rast, ranges::Tuple) source_count = _valid_sources(count, p, rast, ranges) target_count = _valid_targets(count, p, rast, ranges) return source_count, target_count @@ -254,10 +271,10 @@ end centersize(p::BatchProblem) = p.centersize function solve(p::BatchProblem, rast::RasterStack; - window_indices=_window_indices(p, rast), kw... + batch_indices=_select_indices(p, rast), kw... ) - for i in eachindex(window_indices) - solve(p, rast, i; window_indices, kw...) + for i in eachindex(batch_indices) + solve(p, rast, i; batch_indices, kw...) end end function solve(p::BatchProblem, rast::RasterStack, i; verbose=false, kw...) @@ -265,40 +282,54 @@ function solve(p::BatchProblem, rast::RasterStack, i; verbose=false, kw...) end # Single batch job for running on clusters function solve!(ws::NamedTuple, p::BatchProblem; verbose=false, kw...) - # Solve for this window - output = solve!(ws.workspace, p.problem; verbose) - # Store the output rasters for this job to disk and return the file path - return _store(p, output, ws.window; verbose) + output = solve!(ws.workspace, p.problem; verbose) # Store the output rasters for this job to disk and return the fiee path + return if ismissing(output) + missing + else + non_missing_output = collect(skipmissing(output)) + @show length(non_missing_output) + if length(non_missing_output) > 0 + _store(p, non_missing_output, ws.batch_ranges; verbose) + else + missing + end + end end -function init(p::BatchProblem, rast::RasterStack, i::Int; - window_ranges=_window_ranges(p, rast), - window_indices=(println("Calculating window indices, pass `window_indices` to skip... "); _window_indices(p, rast; window_ranges)), - kw... +function init(p::BatchProblem{<:WindowedProblem}, rast::RasterStack, i::Int; + batch_ranges=_window_ranges(p, rast), + batch_indices=(println("Calculating batch indices, pass `batch_indices` to skip... "); _select_indices(p, rast; window_ranges=batch_ranges)), + window_indices=nothing, + grid_sizes=nothing, + verbose=false, ) - init!((; rast, window_ranges, window_indices), p, i; kw...) + # Get the raster data for job i + ranges = batch_ranges[batch_indices[i]] + verbose && @show ranges + # We want to materialise the raster, and we don't need sparse targets + batch_rast = rast[ranges...] + + window_ranges = _window_ranges(p, batch_rast) + grid_sizes = isnothing(grid_sizes) ? _estimate_grid_sizes(p, batch_rast) : grid_sizes[i] + selected_window_indices = isnothing(window_indices) ? _select_indices(p, batch_rast; window_ranges, grid_sizes) : window_indices[i] + workspace = init(p.problem, batch_rast; verbose, grid_sizes, selected_window_indices, window_ranges) + return (; workspace, batch=i, batch_ranges) end -function init!(workspace::NamedTuple, p::BatchProblem, i::Int; verbose=true) - @show "here" - @show "Initialising batch problem" - (; window_indices, window_ranges, rast) = workspace +function init(p::BatchProblem{<:Problem}, rast::RasterStack, i::Int; + batch_ranges=_window_ranges(p, rast), + batch_indices=(println("Calculating batch indices, pass `batch_indices` to skip... "); _select_indices(p, rast; window_ranges=batch_ranges)), + verbose=false, +) # Get the raster data for job i - window = window_ranges[window_indices[i]] - verbose && @show window - # Just read the whole thing now to reduce reads in overlapping windows - batch_rast = if p.problem isa WindowedProblem - # We want to materialise the raster, and we don't need sparse targets - rast[window...] - else # isa Problem - # We also want to materialise the window, but with sparse targets - _get_window_with_zeroed_buffer(getindex, p, rast, window) - end - verbose && @show size(batch_rast) - return (; rast=batch_rast, workspace=init(p.problem, batch_rast; verbose), batch=1, window) + ranges = batch_ranges[batch_indices[i]] + verbose && @show ranges + # Materialise the window, but with sparse targets + batch_rast = _get_window_with_zeroed_buffer(getindex, p, rast, ranges) + worskpace = init(p.problem, batch_rast; verbose) + return (; workspace, batch=i, batch_ranges) end -# Mosaic the stored files to a RasterStack function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) - paths = _batch_paths(p, to) + paths = batch_paths(p, to) stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] return Rasters.mosaic(sum, stacks; missingval, to, kw...) end @@ -310,35 +341,31 @@ function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where {K ) end -_batch_paths(p, x::Union{RasterStack,Tuple}; window_ranges=_window_ranges(p, x)) = - [_batch_path(p, rs) for rs in window_ranges] +batch_paths(p, x::Union{RasterStack,Tuple}; batch_ranges=_window_ranges(p, x)) = + [_batch_path(p, rs) for rs in batch_ranges] function _batch_path(p, ranges::Tuple) corners = map(first, ranges) - window_dirname = "window_" * join(corners, '_') - return joinpath(p.datapath, window_dirname) + dirname = "batch_" * join(corners, '_') + return joinpath(p.datapath, dirname) end ### Shared utilities -function _window_indices(p, rast; +# Select the windows in rast that a likely to have valid targets +# pruning may further remove some windows, but is too expensive to do here +# Running `assess` before solving to do this perfectly. +function _select_indices(p, rast; window_ranges=_window_ranges(p, rast), - window_sizes=_window_sizes(p, rast; window_ranges) + grid_sizes=_grid_sizes(p, rast; window_ranges) ) # Get the Bool mask of needed windows - mask = prod.(window_sizes) .> 0 + mask = prod.(grid_sizes) .> 0 # Get the Int indices of the needed windows return eachindex(mask)[vec(mask)] end -function _window_sizes(p, rast::RasterStack; window_ranges=_window_ranges(p, rast)) - map(window_ranges) do rs - window_rast = view(rast, rs...) - _problem_size(p, window_rast) - end -end - _window_ranges(p::Union{BatchProblem,WindowedProblem}, rast::AbstractRasterStack) = _window_ranges(p::Union{BatchProblem,WindowedProblem}, size(rast)) function _window_ranges(p::Union{BatchProblem,WindowedProblem}, size::Tuple) From ebfc76c0580ad387018c362678f719a275950250 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 23 Feb 2025 13:06:40 +0100 Subject: [PATCH 38/51] bugfix and passing tests --- Project.toml | 4 +- src/assessment.jl | 30 +++++++-- src/grid.jl | 65 ++++++++----------- src/gridrsp.jl | 28 ++++++--- src/problem.jl | 2 +- src/solvers.jl | 23 +++---- src/windows.jl | 154 ++++++++++++++++++++++++++++++++-------------- test/runtests.jl | 67 +------------------- test/windowed.jl | 29 +++++---- 9 files changed, 211 insertions(+), 191 deletions(-) diff --git a/Project.toml b/Project.toml index fd1d031..4e4d22a 100644 --- a/Project.toml +++ b/Project.toml @@ -25,6 +25,7 @@ DelimitedFiles = "1" Graphs = "1" LaTeXStrings = "1.1" LinearSolve = "2.38.0" +Plots = "1.4" ProgressLogging = "0.1" Rasters = "0.14" SimpleWeightedGraphs = "1.1" @@ -32,6 +33,7 @@ julia = "1.10" [extras] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" [targets] -test = ["ArchGDAL"] +test = ["ArchGDAL","Plots"] diff --git a/src/assessment.jl b/src/assessment.jl index 11acbbb..d7dd7b9 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -160,18 +160,16 @@ already have a data folder (assumed to be successfully completed). """ function reassess(p::BatchProblem, a::NestedAssessment) (; njobs, mask, indices) = _reassess(p, a) - assessments = a.assessments[indices] - return NestedAssessment(a.size, a.shape, njobs, mask, indices, assessments) + return NestedAssessment(a.size, a.shape, njobs, mask, indices, a.assessments) end function reassess(p::BatchProblem, a::WindowAssessment) (; njobs, mask, indices) = _reassess(p, a) - sizes = a.sizes[indices] - return WindowAssessment(a.size, a.shape, njobs, mask, indices, sizes) + return WindowAssessment(a.size, a.shape, njobs, mask, indices, a.grid_sizes) end function _reassess(p, a) # Paths for all batches - paths = _batch_paths(p, size(a)) + paths = batch_paths(p, size(a)) # Paths for non-empty batches jobpaths = paths[a.indices] # Find all the jobs that havent been saved (failed) @@ -184,3 +182,25 @@ function _reassess(p, a) return (; njobs, mask, indices) end +# Accept ProblemAssessment as an argument to solve and init +# To used instead of keywords +solve(p::BatchProblem, rast::RasterStack, a::ProblemAssessment, i::Int...; verbose=false) = + solve!(init(p, rast, a), p, i...; verbose) + +function init(p::BatchProblem{<:WindowedProblem}, rast::RasterStack, a::NestedAssessment, i::Int...; kw...) + batch_ranges = _window_ranges(p, rast) + grid_sizes = map(a.assessments) do a_w + a_w.grid_sizes + end + selected_window_indices = map(a.assessments) do a_w + a_w.indices + end + init(p, rast, i...; + batch_ranges, + batch_indices=a.indices, + grid_sizes, + selected_window_indices, + ) +end +init(p::BatchProblem{<:Problem}, rast::RasterStack, a::WindowAssessment, i::Int...; kw...) = + init(p, rast, i...; batch_indices=a.indices) \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index edd02c3..bd2fd44 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -1,12 +1,12 @@ """ Grid(nrows::Integer, - ncols::Integer; - affinities=nothing, - qualities::Matrix=ones(nrows, ncols), - source_qualities::Matrix=qualities, - target_qualities::AbstractMatrix=qualities, - costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), - prune=true)::Grid + ncols::Integer; + affinities=nothing, + qualities::Matrix=ones(nrows, ncols), + source_qualities::Matrix=qualities, + target_qualities::AbstractMatrix=qualities, + costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), + prune=true)::Grid Construct a `Grid` from an `affinities` matrix of type `SparseMatrixCSC`. @@ -38,7 +38,9 @@ function Grid(nrows::Integer, source_qualities::AbstractMatrix=qualities, target_qualities::AbstractMatrix=qualities, costs::Union{Transformation,SparseMatrixCSC{Float64,Int}}=MinusLog(), - prune=true) + prune=true, + check=false, +) if affinities === nothing throw(ArgumentError("matrix of affinities must be supplied")) @@ -52,6 +54,7 @@ function Grid(nrows::Integer, _source_qualities = convert(Matrix{Float64}, _unwrap(source_qualities)) _target_qualities = convert(AbstractMatrix{Float64}, _unwrap(target_qualities)) + # TODO use or remove this # Prune # id_to_grid_coordinate_list = if prune # nonzerocells = findall(!iszero, vec(sum(affinities, dims=1))) @@ -73,15 +76,18 @@ function Grid(nrows::Integer, nothing, costs end - # if any(t -> t < 0, nonzeros(costmatrix)) - # throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) - # end - # cost_digraph = SimpleDiGraph(costmatrix) - # affinity_digraph = SimpleDiGraph(affinities) + # This is too expensive to calculate for small target grids + if check + if any(t -> t < 0, nonzeros(costmatrix)) + throw(ArgumentError("The cost graph can have only non-negative edge weights. Perhaps you should change the cost function?")) + end + cost_digraph = SimpleDiGraph(costmatrix) + affinity_digraph = SimpleDiGraph(affinities) - # if ne(difference(cost_digraph, affinity_digraph)) > 0 - # throw(ArgumentError("cost graph contains edges not present in the affinity graph")) - # end + if ne(difference(cost_digraph, affinity_digraph)) > 0 + throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + end + end targetidx, targetnodes = _targetidx_and_nodes(target_qualities, id_to_grid_coordinate_list) qs = [_source_qualities[i] for i in id_to_grid_coordinate_list] @@ -120,41 +126,24 @@ function Grid(rast::RasterStack; ) Grid(size(rast)...; affinities, qualities, source_qualities, target_qualities, kw...) end -# TODO move functions like MinusLog to problems and pass in here Grid(p::AbstractProblem, rast::RasterStack; kw...) = Grid(rast; costs=costs(p), prune=prune(p), kw...) - -# TODO: clarify this +# TODO: better name? target_size(g::Grid) = size(g.costmatrix, 1), length(g.targetnodes) Base.size(g::Grid) = (g.nrows, g.ncols) -DimensionalData.dims(g::Grid) = g.dims - function Base.show(io::IO, ::MIME"text/plain", g::Grid) print(io, summary(g), " of size ", g.nrows, "x", g.ncols) end -# function Base.show(io::IO, ::MIME"text/html", g::Grid) -# t = string(summary(g), " of size ", g.nrows, "x", g.ncols) -# write(io, "

$t

") -# write(io, "
Affinities
") -# show(io, MIME"text/html"(), plot_outdegrees(g)) -# write(io, "
") -# if g.source_qualities === g.target_qualities -# write(io, "
Qualities
") -# show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) -# else -# write(io, "
Source qualities") -# show(io, MIME"text/html"(), heatmap(g.source_qualities, yflip=true)) -# write(io, "Target qualities") -# show(io, MIME"text/html"(), heatmap(Matrix(g.target_qualities), yflip=true)) -# write(io, "
") -# end -# end + +DimensionalData.dims(g::Grid) = g.dims _id_gc_list(nrows, ncols) = vec(collect(CartesianIndices((nrows, ncols)))) + _unwrap(R::Raster) = parent(R) _unwrap(R::AbstractMatrix) = R + # Compute a vector of the cartesian indices of nonzero target qualities and # the corresponding node id corresponding to the indices _targetidx(q::AbstractMatrix, grididxs::AbstractVector) = grididxs diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 34a620d..7873fb6 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -30,6 +30,7 @@ function GridRSP(g::Grid; θ=nothing, verbose=true) return GridRSP(g, θ, Pref, W, Z) end +Base.size(grsp::GridRSP) = size(grsp.g) function Base.show(io::IO, ::MIME"text/plain", grsp::GridRSP) print(io, summary(grsp), " of size ", grsp.g.nrows, "x", grsp.g.ncols) end @@ -46,7 +47,7 @@ DimensionalData.dims(grsp::GridRSP) = dims(grsp.g) Compute RSP betweenness of all nodes weighted by source and target qualities. """ function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; - output=fill(NaN, g.nrows, g.ncols), + output=_init_output(grsp.g), kw... ) g = grsp.g @@ -88,7 +89,7 @@ of the matrix of proximities, i.e. after applying the inverse cost function to t matrix of distances. When nothing is specified, the diagonal elements won't be adjusted. """ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; - output=fill(NaN, size(grsp.g)), + output=_init_output(grsp.g), proximities=nothing, kw... ) @@ -104,6 +105,7 @@ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; return _maybe_raster(output, grsp) end + """ edge_betweenness_kweighted(grsp::GridRSP; [distance_transformation=inv(grsp.g.costfunction), diagvalue=nothing])::SparseMatrixCSC{Float64,Int} @@ -328,18 +330,19 @@ function connected_habitat( return connected_habitat(grsp, S; diagvalue) end -function connected_habitat(grsp::Union{GridRSP,NamedTuple}; proximities=nothing, kw...) +function connected_habitat(grsp::GridRSP; proximities=nothing, kw...) if isnothing(proximities) proximities = _computeproximities(grsp; kw...) end return connected_habitat(grsp, proximities; kw...) end -function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; +connected_habitat(grsp::GridRSP, S::Matrix; kw...) = + connected_habitat(grsp.g, S; kw...) +function connected_habitat(g::Grid, S::Matrix; diagvalue::Union{Nothing,Real}=nothing, - output=fill(NaN, size(grsp.g)), + output=_init_output(g), kw... ) - g = grsp.g maybe_set_diagonal!(S, diagvalue, g.targetnodes) funvec = connected_habitat(g.qs, g.qt, S; kw...) @@ -348,10 +351,9 @@ function connected_habitat(grsp::Union{Grid,GridRSP,NamedTuple}, S::Matrix; output[ij] = x end - return _maybe_raster(output, grsp) + return _maybe_raster(output, g) end -function connected_habitat(grsp::Union{GridRSP,NamedTuple}, - cell::CartesianIndex{2}; +function connected_habitat(grsp::GridRSP, cell::CartesianIndex{2}; distance_transformation=nothing, diagvalue=nothing, avalue=floatmin(), # smallest non-zero value @@ -561,7 +563,7 @@ function criticality(grsp::Union{GridRSP,NamedTuple}; distance_transformation=nothing, diagvalue=nothing, avalue=floatmin(), - output=fill(NaN, size(grsp.g)), + output=_init_output(grsp.g), qˢvalue=0.0, qᵗvalue=0.0, kw... @@ -614,3 +616,9 @@ function maybe_set_diagonal!(proximities, diagvalue, targetnodes::AbstractVector proximities[i, j] = diagvalue end end + +function _init_output(g::Grid) + o = fill(eltype(g.affinities)(NaN), size(g)) + o[g.id_to_grid_coordinate_list] .= 0 + return o +end \ No newline at end of file diff --git a/src/problem.jl b/src/problem.jl index 9b8bc7d..e7f937e 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -57,7 +57,7 @@ solve!(workspace::NamedTuple, p::Problem; kw...) = solve!(workspace, solver(p), connectivity_measure(p), p; kw...) # `init`` calls `init!` on an empty workspace -init(p::AbstractProblem, args...; kw...) = init!((;), p, args...; kw...) +init(p::Problem, args...; kw...) = init!((;), p, args...; kw...) # init! requirements are conditional on solver and connectivity measure # See solvers.jl function init!(workspace::NamedTuple, p::Problem, rast::RasterStack; verbose=false, kw...) diff --git a/src/solvers.jl b/src/solvers.jl index 1c22630..8e9a025 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -61,16 +61,17 @@ end LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) # In `init!` we allocate all large dense arrays -# function init!( -# ws::NamedTuple, -# solver::MatrixSolver, -# cm::FundamentalMeasure, -# p::AbstractProblem, -# rast::RasterStack; -# verbose=false, -# ) -# _init!(ws, solver, cm, p, rast; verbose) -# end +function init!( + ws::NamedTuple, + solver::MatrixSolver, + cm::FundamentalMeasure, + p::AbstractProblem, + rast::RasterStack; + verbose=false, +) + grid = Grid(p, rast) + _init!(ws, solver, cm, p, grid; verbose) +end function init!( ws::NamedTuple, solver::Union{VectorSolver,LinearSolver}, @@ -80,7 +81,7 @@ function init!( verbose=false, ) # Initialise the whole grid - grid = Grid(p, rast; prune=false) + grid = Grid(p, rast) # Initialise the workspace workspace = _init!(ws, solver, cm, p, grid; verbose) if isthreaded(solver) diff --git a/src/windows.jl b/src/windows.jl index 6797c41..54b22be 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -195,11 +195,21 @@ end """ BatchProblem(problem::AbstractProblem; buffer, centersize, path, ext) -Combine multiple compute operations into a single object, -when compute times are long and intermediate storage is needed. +Split a large `Problem` into windowed batches, similar to `WindowedProblem`, +but allow launching individual batches separately with a batch id, and stores +them to separate files when finished, rather than returning the finished job. + +`BatchProblem` is useful when compute times are long and intermediate storage is needed, +and is designed for use with SLURM and similar computate clusters. -`problem` is usually a [`Problem`](@ref) object or a `WindowedProblem` -for nested operations. +`problem` can be a [`Problem`](@ref) object or a `WindowedProblem` for nested operations. +Deciding to use a Problem or NestedProblem will depend on the tradeoffs of loading and +saving raster data for each window area. This may be relatively expensive if the batch windows are +not very large. Due to the ON^2 scaling of connectivity calculations large batches will also +become expensive using `Problem` directly. + +If `problem` is a `WindowedProblem` IO overheads should be negligible in +comparison to the workload of running `solve` for all windows in a batch. # Keywords @@ -208,13 +218,55 @@ for nested operations. - `centersize`: The size of the target square - `buffer`: The area outside taret square - `datapath`: The path to store the output rasters. -- `joblistpath`: The path to find the job list. - `grain`: amount of thinning to apply to the target qualities. `nothing` by default. if `2 is used`, the target qualities will be sampled every 2x2 pixels, and should run 4x faster. - `ext`: The file extension for Rasters.jl to write to. Defaults to `.tif`, But can be `.nc` for NetCDF, or most other common extensions. -- `threaded`: Whether to run in parallel. `false` by default. If the problem - is also threaded at some level it may be faster to set this to `false`. + +BatchProblem is designed so that `init`, `init!`, `solve` and `solve!` can all be called on +`f(p::BatchProblem, rast::RasterStack)` to run all batches, or with a batch number +`f(p::BatchProblem, rast::RasterStack, batch::Int)` to run a single batch. + +# Example + +Calculating `init!` for `BatchProblem` is relatively expensive, and should be done once for all batches if possible. +This happens inside `solve` and `init` unless a [`ProblemAssessment`](@ref) object is passed in. + +Running [`assess`](@ref) first is the best option, and is inteded to allow assesment of the scale of the +problem, as it may require hundreds or thousands of CPU hours to complete. + +With this approach, batches can be run with: + +```julia +usign ConScape, JSON3, MyConScapeApp +batch_problem = define_my_batch() +rast = get_my_rasterstack() +assessment = assess(batchproblem, rast) # Will take a long time +JSON3.write("assessment.json") +``` + +Noticed we defined our own application package MyConScapeApp. This is a good way to +share functions like `define_my_batch` accross multiple task launches on a cluster. +See the ConScape GitHub organisation for working examples of packages like this. + +```julia +usign ConScape, Rasters, MyConScapeApp +batch_problem = define_my_batch() +rast = get_my_rasterstack() +assessment = JSON3.read("assessment.json") +# And here we pass the assesment to `solve` +solve(batch_problem, rast, assessment, batch) +``` + +Finally, when all batches have run we can mosaic the results together + +```julia +usign ConScape, Rasters, MyConScapeApp +batch_problem = define_my_batch() +rast = get_my_rasterstack() +# And here we pass the assesment to `solve` +mosaic(batch_problem; to=rast) +``` """ @kwdef struct BatchProblem{P} <: AbstractWindowedProblem{P} problem::P @@ -252,8 +304,8 @@ function BatchProblem(problem::WindowedProblem; rem(bcs, wcs) == 0 || throw(ArgumentError("BatchProblem centersize must be a multiple of WindowedProblem centersize. Got $centersize and $(problem.centersize)")) end - isnothing(nwindows) || throw(ArgumentError("Cannot specify both centersize and nwindows")) end + BatchProblem(; problem, buffer, centersize, kw...) end @@ -270,71 +322,79 @@ end centersize(p::BatchProblem) = p.centersize -function solve(p::BatchProblem, rast::RasterStack; - batch_indices=_select_indices(p, rast), kw... -) - for i in eachindex(batch_indices) - solve(p, rast, i; batch_indices, kw...) - end -end -function solve(p::BatchProblem, rast::RasterStack, i; verbose=false, kw...) - solve!(init(p, rast, i; verbose, kw...), p; verbose, kw...) -end +solve(p::BatchProblem, rast::RasterStack; verbose=false, kw...) = + solve!(init(p, rast; verbose, kw...), p; verbose) +solve(p::BatchProblem, rast::RasterStack, i; verbose=false, kw...) = + solve!(init(p, rast; verbose, kw...), p, i; verbose) + # Single batch job for running on clusters function solve!(ws::NamedTuple, p::BatchProblem; verbose=false, kw...) - output = solve!(ws.workspace, p.problem; verbose) # Store the output rasters for this job to disk and return the fiee path + for i in eachindex(ws.batch_indices) + solve!(ws, p, i; verbose) + end +end +function solve!(ws::NamedTuple, p::BatchProblem, i::Int; verbose=false) + output = solve!(init!(ws, p, i).child_workspace, p.problem; verbose) # Store the output rasters for this job to disk and return the fiee path return if ismissing(output) missing else - non_missing_output = collect(skipmissing(output)) - @show length(non_missing_output) - if length(non_missing_output) > 0 - _store(p, non_missing_output, ws.batch_ranges; verbose) - else - missing - end + _store(p, output, ws.batch_ranges[ws.batch_indices[i]]; verbose) end end -function init(p::BatchProblem{<:WindowedProblem}, rast::RasterStack, i::Int; +init(p::BatchProblem, rast::RasterStack, i::Int; verbose=false, kw...) = + init!(init(p, rast; verbose, kw...), p, i::Int; verbose) +function init(p::BatchProblem{<:WindowedProblem}, rast::RasterStack; batch_ranges=_window_ranges(p, rast), - batch_indices=(println("Calculating batch indices, pass `batch_indices` to skip... "); _select_indices(p, rast; window_ranges=batch_ranges)), - window_indices=nothing, + batch_indices=_select_indices(p, rast; window_ranges=batch_ranges), + selected_window_indices=nothing, grid_sizes=nothing, - verbose=false, + kw... +) + return (; rast, batch_ranges, batch_indices, selected_window_indices, grid_sizes) +end +function init(p::BatchProblem{<:Problem}, rast::RasterStack; + batch_ranges=_window_ranges(p, rast), + batch_indices=_select_indices(p, rast; window_ranges=batch_ranges), + kw... ) + return (; rast, batch_ranges, batch_indices) +end + +function init!(ws::NamedTuple, p::BatchProblem{<:WindowedProblem}, i::Int; verbose=false) + (; rast, batch_ranges, batch_indices, selected_window_indices, grid_sizes) = ws # Get the raster data for job i - ranges = batch_ranges[batch_indices[i]] verbose && @show ranges - # We want to materialise the raster, and we don't need sparse targets + ranges = batch_ranges[batch_indices[i]] batch_rast = rast[ranges...] - - window_ranges = _window_ranges(p, batch_rast) - grid_sizes = isnothing(grid_sizes) ? _estimate_grid_sizes(p, batch_rast) : grid_sizes[i] - selected_window_indices = isnothing(window_indices) ? _select_indices(p, batch_rast; window_ranges, grid_sizes) : window_indices[i] - workspace = init(p.problem, batch_rast; verbose, grid_sizes, selected_window_indices, window_ranges) - return (; workspace, batch=i, batch_ranges) + # Get window ranges for batch i + window_ranges = _window_ranges(p.problem, batch_rast) + # Get grid sizes for batch i + grid_sizes = isnothing(grid_sizes) ? _estimate_grid_sizes(p.problem, batch_rast; window_ranges) : grid_sizes[batch_indices[i]] + selected_window_indices = isnothing(selected_window_indices) ? _select_indices(p.problem, batch_rast; window_ranges, grid_sizes) : selected_window_indices[batch_indices[i]] + # Initialise the containted WindowedProblem + child_workspace = init(p.problem, batch_rast; verbose, grid_sizes, selected_window_indices, window_ranges) + return merge(ws, (; child_workspace, batch=i)) end -function init(p::BatchProblem{<:Problem}, rast::RasterStack, i::Int; - batch_ranges=_window_ranges(p, rast), - batch_indices=(println("Calculating batch indices, pass `batch_indices` to skip... "); _select_indices(p, rast; window_ranges=batch_ranges)), - verbose=false, -) +function init!(ws::NamedTuple, p::BatchProblem{<:Problem}, i::Int; verbose=false) + (; rast, batch_ranges, batch_indices) = ws # Get the raster data for job i ranges = batch_ranges[batch_indices[i]] verbose && @show ranges # Materialise the window, but with sparse targets batch_rast = _get_window_with_zeroed_buffer(getindex, p, rast, ranges) - worskpace = init(p.problem, batch_rast; verbose) - return (; workspace, batch=i, batch_ranges) + child_workspace = init(p.problem, batch_rast; verbose) + return merge(ws, (; child_workspace, batch=i)) end + function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) paths = batch_paths(p, to) stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] return Rasters.mosaic(sum, stacks; missingval, to, kw...) end -function _store(p::BatchProblem, output::RasterStack{K}, ranges; kw...) where {K} + +function _store(p::BatchProblem, output::RasterStack{K}, ranges::Tuple; kw...) where {K} dir = mkpath(_batch_path(p, ranges)) return Rasters.write(joinpath(dir, ""), output; ext=p.ext, force=true, verbose=false, kw... @@ -358,7 +418,7 @@ end # Running `assess` before solving to do this perfectly. function _select_indices(p, rast; window_ranges=_window_ranges(p, rast), - grid_sizes=_grid_sizes(p, rast; window_ranges) + grid_sizes=_estimate_grid_sizes(p, rast; window_ranges) ) # Get the Bool mask of needed windows mask = prod.(grid_sizes) .> 0 diff --git a/test/runtests.jl b/test/runtests.jl index d8f839c..e606895 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,4 @@ + using ConScape, Test, SparseArrays using Rasters, ArchGDAL, Plots @@ -130,14 +131,6 @@ _tempdir = mkdir(tempname()) b = IOBuffer() show(b, "text/plain", grsp) @test occursin("GridRSP", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", g) - @test occursin("Grid", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", grsp) - @test occursin("GridRSP", String(take!(b))) end end @@ -213,12 +206,6 @@ end 17.339919976251554] end - @testset "Old Grid plotting" begin - @test ConScape.plot_indegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_outdegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_values(g,ones(length(g.id_to_grid_coordinate_list))) isa ConScape.Plots.Plot - end - grsp = ConScape.GridRSP(g, θ=θ) @testset "GridRSP fields" begin @@ -314,14 +301,6 @@ end b = IOBuffer() show(b, "text/plain", grsp) @test occursin("GridRSP", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", g) - @test occursin("Grid", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", grsp) - @test occursin("GridRSP", String(take!(b))) end end @@ -356,12 +335,6 @@ end 511.0 510.0 509.0] end - @testset "Grid plotting" begin - @test ConScape.plot_indegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_outdegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_values(g,ones(length(g.id_to_grid_coordinate_list))) isa ConScape.Plots.Plot - end - grsp = ConScape.GridRSP(g, θ=θ) @testset "GridRSP fields" begin @@ -513,14 +486,6 @@ end b = IOBuffer() show(b, "text/plain", grsp) @test occursin("GridRSP", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", g) - @test occursin("Grid", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", grsp) - @test occursin("GridRSP", String(take!(b))) end end @@ -544,12 +509,6 @@ end corridorwidths=(3,2), qualities=sq) - @testset "Grid plotting" begin - @test ConScape.plot_indegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_outdegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_values(g,ones(length(g.id_to_grid_coordinate_list))) isa ConScape.Plots.Plot - end - grsp = ConScape.GridRSP(g, θ=0.2) @testset "Show methods" begin @@ -560,14 +519,6 @@ end b = IOBuffer() show(b, "text/plain", grsp) @test occursin("GridRSP", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", g) - @test occursin("Grid", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", grsp) - @test occursin("GridRSP", String(take!(b))) end @testset "Landmark approach" begin @@ -594,12 +545,6 @@ end source_qualities=sq, target_qualities=landmarks) - @testset "Grid plotting" begin - @test ConScape.plot_indegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_outdegrees(g) isa ConScape.Plots.Plot - @test ConScape.plot_values(g,ones(length(g.id_to_grid_coordinate_list))) isa ConScape.Plots.Plot - end - grsp = ConScape.GridRSP(g, θ=0.2) @testset "Show methods" begin @@ -610,14 +555,6 @@ end b = IOBuffer() show(b, "text/plain", grsp) @test occursin("GridRSP", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", g) - @test occursin("Grid", String(take!(b))) - - b = IOBuffer() - show(b, "text/html", grsp) - @test occursin("GridRSP", String(take!(b))) end @testset "Landmark approach" begin @@ -918,5 +855,5 @@ end costs=sparse( [2, 3, 1, 4, 1, 4, 2, 3], [1, 1, 2, 2, 3, 3, 4, 4], - [1.0, 1, 1, 1, 1, 1, 1, 1])) + [1.0, 1, 1, 1, 1, 1, 1, 1]); check=true) end diff --git a/test/windowed.jl b/test/windowed.jl index e0e30f4..4464118 100644 --- a/test/windowed.jl +++ b/test/windowed.jl @@ -1,5 +1,5 @@ using ConScape, Test, SparseArrays, LinearAlgebra -using Rasters, ArchGDAL, Plots +using Rasters, ArchGDAL using ConScape.LinearSolve datadir = joinpath(dirname(pathof(ConScape)), "..", "data") @@ -18,7 +18,7 @@ qualities_asc = ConScape.readasc(joinpath(datadir, "qualities_$landscape.asc"))[ qualities_asc[(affinities_asc .> 0) .& isnan.(qualities_asc)] .= 1e-20 graph_measures = (; - # betq=ConScape.BetweennessQweighted(), +# betq=ConScape.BetweennessQweighted(), betk=ConScape.BetweennessKweighted(), ch=ConScape.ConnectedHabitat(), # # TODO sens=ConScape.Sensitivity(), @@ -104,9 +104,10 @@ end @test assessment.njobs == 39 for job in 1:assessment.njobs - ConScape.solve(batch_jobs_problem, rast, job; window_indices=assessment.indices) + ConScape.solve(batch_jobs_problem, rast, assessment, job) end batch_jobs_result = mosaic(batch_jobs_problem; to=rast) + batch_jobs_result.betk @testset "reassessment" begin @@ -116,7 +117,7 @@ end @test length(re1.indices) == 0 # Delete three results - paths = ConScape._batch_paths(batch_jobs_problem, size(assessment)) + paths = ConScape.batch_paths(batch_jobs_problem, size(assessment)) rm.(paths[[1, 7, 21]]; recursive=true) re2 = ConScape.reassess(batch_jobs_problem, assessment) @test re2.njobs == 3 @@ -125,7 +126,7 @@ end # Run the reassessment for job in 1:re2.njobs - ConScape.solve(batch_jobs_problem, rast, job; window_indices=re2.indices) + ConScape.solve(batch_jobs_problem, rast, re2, job) end # Again there are no jobs left @@ -148,8 +149,8 @@ end datapath=tempname(), centersize=(10, 10) ) # Try one - @time workspace = ConScape.init(nested_jobs_problem, rast, 5) - @time ConScape.solve!(workspace, nested_jobs_problem) + @time workspace = ConScape.init(nested_jobs_problem, rast) + @time ConScape.solve!(workspace, nested_jobs_problem, 5) assessment = ConScape.assess(nested_jobs_problem, rast); for job in 1:assessment.njobs @@ -164,16 +165,16 @@ end @test length(re1.indices) == 0 # Delete three results - paths = ConScape._batch_paths(nested_jobs_problem, size(assessment)) + paths = ConScape.batch_paths(nested_jobs_problem, size(assessment)) rm.(paths[[2, 5]]; recursive=true) re2 = ConScape.reassess(nested_jobs_problem, assessment) @test re2.njobs == 2 @test length(re2.indices) == 2 @test re2.mask[[2, 5]] == [true, true] - + re2 # Run the reassessment for job in 1:re2.njobs - ConScape.solve(nested_jobs_problem, rast, job; window_indices=re2.indices) + ConScape.solve(nested_jobs_problem, rast, re2, job) end # Again there are no jobs left @@ -190,13 +191,15 @@ end keys(nested_jobs_result) == Tuple(sort(collect(expected_layers))) - @test all(permutedims(batch_jobs_result.ch) .=== permutedims(batch_result.ch)) - @test all(permutedims(batch_jobs_result.betk) .=== permutedims(batch_result.betk)) - # These may be approximate after mosaic order changes compare(a, b) = isnan(a) && isnan(b) || isapprox(a, b) + + @test all(batch_jobs_result.ch .=== batch_result.ch) + @test all(batch_jobs_result.betk .=== batch_result.betk) @test all(compare.(permutedims(batch_result.ch), windowed_result.ch)) @test all(compare.(permutedims(batch_result.betk), windowed_result.betk)) + @test all(compare.(nested_result.betk, nested_jobs_result.betk)) + @test all(compare.(nested_result.ch, nested_jobs_result.ch)) # TODO: there are some tiny fp differences in the nested result @test all(map(nested_result.ch, batch_result.ch) do n, b From 7fd91e3e8a8c9cbb7e6789e14abfa1032a9584d7 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 23 Feb 2025 14:44:07 +0100 Subject: [PATCH 39/51] use spawn for window threading --- src/windows.jl | 32 +++++++++++++------------------- test/windowed.jl | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/windows.jl b/src/windows.jl index 54b22be..7c3b9f2 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -76,13 +76,11 @@ function solve!(workspace::NamedTuple, p::WindowedProblem; put!(ch, ws) end # Set up channels for threading - # Define empty outputs - output_stacks = Vector{Union{RasterStack,Missing}}(undef, length(sorted_indices)) # Define a runner for threaded/non-threaded operation function run(i, iw) # Get a window range window = window_ranges[iw] - verbose && println("Running job $iw on ranges $window and thread $(Threads.threadid())") + verbose && println("Running job $i - $iw for ranges $window and thread $(Threads.threadid())") # verbose && println("Solving window $i $window ") window_rast = _get_window_with_zeroed_buffer(view, p, rast, window) # Initialise the window using stored memory @@ -93,29 +91,25 @@ function solve!(workspace::NamedTuple, p::WindowedProblem; # Solve for the window verbose && println("Solving window $window...") grid = workspace_initialised.grid - output_stacks[i] = if prod(target_size(grid)) > 0 - solve!(workspace_initialised, p.problem) - else - missing + elapsed = @elapsed begin + output = if prod(target_size(grid)) > 0 + solve!(workspace_initialised, p.problem) + else + missing + end end # Return the workspace to the channel put!(ch, workspace) + return output, elapsed end - window_elapsed = Vector{Pair{Float64,Int64}}(undef, length(sorted_indices)) # Run the window problems - if p.threaded - Threads.@threads for i in eachindex(sorted_indices) - iw = sorted_indices[i] - e = @elapsed run(i, iw) - window_elapsed[i] = e => iw - end + out_elapsed = if p.threaded + fetch.([Threads.@spawn run(i, sorted_indices[i]) for i in eachindex(sorted_indices)]) else - for i in eachindex(sorted_indices) - iw = sorted_indices[i] - e = @elapsed run(i, iw) - window_elapsed[i] = e => iw - end + [run(i, sorted_indices[i]) for i in eachindex(sorted_indices)] end + output_stacks = first.(out_elapsed) + window_elapsed = last.(out_elapsed) # Maybe mosaic the output return if mosaic_return t = time() diff --git a/test/windowed.jl b/test/windowed.jl index 4464118..d88ae58 100644 --- a/test/windowed.jl +++ b/test/windowed.jl @@ -36,7 +36,7 @@ solve(problem, rast; verbose=true) @testset "target mosaicing matches original" begin windowed_problem = ConScape.WindowedProblem(problem; - buffer=10, centersize=5, threaded=false + buffer=10, centersize=5, threaded=true, ) @test collect(ConScape._window_ranges(windowed_problem, rast)) == [ (1:25, 1:25) (1:25, 6:30) (1:25, 11:35) (1:25, 16:40) (1:25, 21:45) (1:25, 26:50) (1:25, 31:55) (1:25, 36:59) From fe07186ab4f39c8d70235481a85436afbe4f7e4c Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 23 Feb 2025 15:33:36 +0100 Subject: [PATCH 40/51] gc --- src/windows.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/windows.jl b/src/windows.jl index 7c3b9f2..bd7ae69 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -100,6 +100,8 @@ function solve!(workspace::NamedTuple, p::WindowedProblem; end # Return the workspace to the channel put!(ch, workspace) + # Garbage collect for this window + GC.gc() return output, elapsed end # Run the window problems @@ -332,7 +334,10 @@ function solve!(ws::NamedTuple, p::BatchProblem, i::Int; verbose=false) return if ismissing(output) missing else - _store(p, output, ws.batch_ranges[ws.batch_indices[i]]; verbose) + ranges = ws.batch_ranges[ws.batch_indices[i]] + # Clear out some memory before writing + GC.gc() + _store(p, output, ranges; verbose) end end From e296e6ffe778717c2ad0de420976e521f9f585c5 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 23 Feb 2025 21:55:21 +0100 Subject: [PATCH 41/51] fix doc --- src/assessment.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assessment.jl b/src/assessment.jl index d7dd7b9..a8ccc02 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -151,7 +151,7 @@ function assess( end """ - reassess(a::NestedAssessment, p::BatchProblem) + reassess(p::BatchProblem, a::NestedAssessment) Re-asses an existing nested assesment of a BatchProblem. From 8e92af7e6d1e9eebe5f800cd348d9eb137863585 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 23 Feb 2025 21:56:12 +0100 Subject: [PATCH 42/51] print wanting for empty stack output --- src/windows.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/windows.jl b/src/windows.jl index bd7ae69..cc079aa 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -331,10 +331,12 @@ function solve!(ws::NamedTuple, p::BatchProblem; verbose=false, kw...) end function solve!(ws::NamedTuple, p::BatchProblem, i::Int; verbose=false) output = solve!(init!(ws, p, i).child_workspace, p.problem; verbose) # Store the output rasters for this job to disk and return the fiee path - return if ismissing(output) + iw = ws.batch_indices[i] + ranges = ws.batch_ranges[iw] + return if ismissing(output) + println("Warning: output was empty for job $i at window $iw over ranges $ranges") missing else - ranges = ws.batch_ranges[ws.batch_indices[i]] # Clear out some memory before writing GC.gc() _store(p, output, ranges; verbose) From 008ebea328329049a838c2c8ddcb8712f7ad367b Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 2 Mar 2025 07:35:52 +0100 Subject: [PATCH 43/51] add assessment warnings --- src/assessment.jl | 92 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 26 deletions(-) diff --git a/src/assessment.jl b/src/assessment.jl index a8ccc02..e6e8821 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -1,3 +1,23 @@ + +struct AssessmentWarnings + source_qualities_nan_found::Bool + target_qualities_nan_found::Bool +end + +function Base.:(|)(aw1::AssessmentWarnings, aw2) + AssessmentWarnings( + aw1.source_qualities_nan_found | aw2.source_qualities_nan_found, + aw1.target_qualities_nan_found | aw2.target_qualities_nan_found, + ) +end +function Base.:(&)(aw1::AssessmentWarnings, aw2) + AssessmentWarnings( + aw1.source_qualities_nan_found & aw2.source_qualities_nan_found, + aw1.target_qualities_nan_found & aw2.target_qualities_nan_found, + ) +end +Base.any(aw::AssessmentWarnings) = aw.source_qualities_nan_found | aw.target_qualities_nan_found + """ ProblemAssessment @@ -30,6 +50,7 @@ a `Problem`. njobs::Int mask::Vector{Bool} indices::Vector{Int} + warnings::AssessmentWarnings grid_sizes::Vector{Tuple{Int,Int}} end @@ -53,18 +74,22 @@ that holds another `AbstractWindowedProblem`. njobs::Int mask::Vector{Bool} indices::Vector{Int} + warnings::AssessmentWarnings assessments::Vector{WindowAssessment} end -function Base.show(io::IO, mime::MIME"text/plain", bs::ProblemAssessment) +function Base.show(io::IO, mime::MIME"text/plain", a::ProblemAssessment) println(io, "NestedAssessment") println(io) - println(io, "Raster shape: $(bs.shape)") - println(io, "Number of jobs: $(bs.njobs)") + println(io, "Shape: $(a.shape)") + println(io, "Number of jobs: $(a.njobs)") # Use SparseArrays nice matrix printing for the mask println(io, "Job mask: ") - mask = sparse(reshape(bs.mask, bs.shape)) + mask = sparse(reshape(a.mask, a.shape)) Base.print_array(io, mask) + if any(a.warnings) + show(io, mime, a.warnings) + end end @@ -78,12 +103,28 @@ This can be used to indicate memory and time reequiremtents on a cluster. """ function assess end -function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; kw...) +function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack; + inner_target_bools=nothing, + target_ranges=_target_ranges(p, rast), + kw... +) # Define the ranges of each window window_ranges = _window_ranges(p, rast) + # Convert everything to Bool at the batch level so window assessments are fast + inner_targets = view(rast.target_qualities, target_ranges...) + warnings = AssessmentWarnings( + any(isnan, rast.qualities), + any(isnan, inner_targets), + ) + inner_target_bools = isnothing(inner_target_bools) ? _isvalid.(inner_targets) : inner_target_bools + qualities = _isvalid.(rast.qualities) + target_qualities = falses(size(rast)) + target_qualities[target_ranges...] .= inner_target_bools + bool_rast = RasterStack((; qualities, target_qualities), dims(rast)) + # Calculate window sizes and allocations - grid_sizes = vec(_estimate_grid_sizes(p, rast; window_ranges)) + grid_sizes = vec(_estimate_grid_sizes(p, bool_rast; window_ranges)) # Organise stats for each window into vectors window_mask = map(s -> prod(s) > 0, grid_sizes) @@ -93,7 +134,7 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack njobs = count(window_mask) shape = size(window_ranges) - WindowAssessment(size(rast), shape, njobs, window_mask, non_empty_indices, grid_sizes) + WindowAssessment(size(rast), shape, njobs, window_mask, non_empty_indices, warnings, grid_sizes) end function assess( p::AbstractWindowedProblem{<:AbstractWindowedProblem}, @@ -118,27 +159,22 @@ function assess( size, shape=(0, 0), njobs=0, - grid_sizes=Tuple{Int,Int}[], mask=Bool[], indices=Int[], + warnings=AssessmentWarnings(false, false), + grid_sizes=Tuple{Int,Int}[], ) end - # Just load the target window quickly first to avoid loading large rasters - window_view = view(rast, rs...) - target_ranges = _target_ranges(p, window_view) + # We only need qualities for the assessment + window_rast = rast[(:qualities, :target_qualities)][rs...] + target_ranges = _target_ranges(p, window_rast) # Convert targets to bool as early as possible - inner_target_bools = _isvalid.(window_view.target_qualities[target_ranges...]) + inner_targets = view(window_rast.target_qualities, target_ranges...) + inner_target_bools = _isvalid.(inner_targets) assessments[i] = if count(inner_target_bools) > 0 - # Convert everything to Bool at the batch level so window assessments are fast - window_bools = open(window_view) do o - qualities = collect(_isvalid.(o.qualities)) - target_qualities = falses(size(o)) - target_qualities[target_ranges...] .= inner_target_bools - RasterStack((; qualities, target_qualities), dims(window_view)) - end - assess(p.problem, window_bools; nthreads, kw...) + assess(p.problem, window_rast; inner_target_bools, target_ranges, nthreads, kw...) else - empty_assesment(size(window_view)) + empty_assesment(size(window_rast)) end end # Get mask and indices @@ -147,7 +183,8 @@ function assess( # Calculate global stats njobs = count(mask) shape = size(window_ranges) - return NestedAssessment(size(rast), shape, njobs, mask, non_empty_indices, assessments) + warnings = reduce(|, (a.warnings for a in assessments)) + return NestedAssessment(size(rast), shape, njobs, mask, non_empty_indices, warnings, assessments) end """ @@ -159,12 +196,15 @@ The returned `NestedAssessment` will exclude any jobs that already have a data folder (assumed to be successfully completed). """ function reassess(p::BatchProblem, a::NestedAssessment) - (; njobs, mask, indices) = _reassess(p, a) - return NestedAssessment(a.size, a.shape, njobs, mask, indices, a.assessments) + patch = _reassess(p, a) + a1 = ConstructionBase.setproperties(a, patch) + # Update nan_target_found from remaining indices + warnings = reduce(|, (a1.assessments[i].warnings for i in a1.indices)) + return ConstructionBase.setproperties(a1, (; warnings)) end function reassess(p::BatchProblem, a::WindowAssessment) - (; njobs, mask, indices) = _reassess(p, a) - return WindowAssessment(a.size, a.shape, njobs, mask, indices, a.grid_sizes) + patch = _reassess(p, a) + return ConstructionBase.setproperties(a, patch) end function _reassess(p, a) From 82210ec30ef4bc9d14f8748119f5d2fb21d78be0 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 2 Mar 2025 07:36:34 +0100 Subject: [PATCH 44/51] no internal mosaic for batches --- src/windows.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/windows.jl b/src/windows.jl index cc079aa..275ef1a 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -365,8 +365,8 @@ end function init!(ws::NamedTuple, p::BatchProblem{<:WindowedProblem}, i::Int; verbose=false) (; rast, batch_ranges, batch_indices, selected_window_indices, grid_sizes) = ws # Get the raster data for job i - verbose && @show ranges ranges = batch_ranges[batch_indices[i]] + verbose && @show ranges batch_rast = rast[ranges...] # Get window ranges for batch i window_ranges = _window_ranges(p.problem, batch_rast) @@ -388,13 +388,6 @@ function init!(ws::NamedTuple, p::BatchProblem{<:Problem}, i::Int; verbose=false return merge(ws, (; child_workspace, batch=i)) end -function Rasters.mosaic(p::BatchProblem; to, lazy=true, missingval=0.0, kw...) - paths = batch_paths(p, to) - stacks = [RasterStack(path; lazy) for path in paths if isdir(path)] - return Rasters.mosaic(sum, stacks; missingval, to, kw...) -end - - function _store(p::BatchProblem, output::RasterStack{K}, ranges::Tuple; kw...) where {K} dir = mkpath(_batch_path(p, ranges)) return Rasters.write(joinpath(dir, ""), output; From ef0d821c6f4922a72199a5ad57fa2f885d01a991 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 2 Mar 2025 07:38:22 +0100 Subject: [PATCH 45/51] remove nans from qualities --- src/grid.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/grid.jl b/src/grid.jl index bd2fd44..7c75583 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -31,9 +31,10 @@ struct Grid{D<:Union{Tuple,Nothing},SQ,TQ} qt::Vector{Float64} dims::D end -function Grid(nrows::Integer, +function Grid( + nrows::Integer, ncols::Integer; - affinities=nothing, + affinities, qualities::AbstractMatrix=ones(nrows, ncols), source_qualities::AbstractMatrix=qualities, target_qualities::AbstractMatrix=qualities, @@ -41,18 +42,13 @@ function Grid(nrows::Integer, prune=true, check=false, ) - - if affinities === nothing - throw(ArgumentError("matrix of affinities must be supplied")) - end - if nrows * ncols != LinearAlgebra.checksquare(affinities) n = size(affinities, 1) throw(ArgumentError("grid size ($nrows, $ncols) is incompatible with size of affinity matrix ($n, $n)")) end - _source_qualities = convert(Matrix{Float64}, _unwrap(source_qualities)) - _target_qualities = convert(AbstractMatrix{Float64}, _unwrap(target_qualities)) + _source_qualities = _prepare_qualities(source_qualities) + _target_qualities = _prepare_qualities(target_qualities) # TODO use or remove this # Prune @@ -85,7 +81,7 @@ function Grid(nrows::Integer, affinity_digraph = SimpleDiGraph(affinities) if ne(difference(cost_digraph, affinity_digraph)) > 0 - throw(ArgumentError("cost graph contains edges not present in the affinity graph")) + throw(ArgumentError("cost graph contains edges not present in the affinity graph")) end end @@ -141,8 +137,12 @@ DimensionalData.dims(g::Grid) = g.dims _id_gc_list(nrows, ncols) = vec(collect(CartesianIndices((nrows, ncols)))) -_unwrap(R::Raster) = parent(R) -_unwrap(R::AbstractMatrix) = R +_prepare_qualities(A::AbstractMatrix) = _no_nan_f64.(_unwrap_raster(A)) + +_no_nan_f64(x) = isnan(x) ? 0.0 : Float64(x) + +_unwrap_raster(R::Raster) = parent(R) +_unwrap_raster(R::AbstractMatrix) = R # Compute a vector of the cartesian indices of nonzero target qualities and # the corresponding node id corresponding to the indices From 6bc31fe3afe94ec8349c13cfa4b8434220af4995 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 3 Mar 2025 11:42:06 +0100 Subject: [PATCH 46/51] fix assessmentwarnings funcs --- src/assessment.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/assessment.jl b/src/assessment.jl index e6e8821..5db3f67 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -4,19 +4,20 @@ struct AssessmentWarnings target_qualities_nan_found::Bool end -function Base.:(|)(aw1::AssessmentWarnings, aw2) +function Base.:(|)(aw1::AssessmentWarnings, aw2::AssessmentWarnings) AssessmentWarnings( aw1.source_qualities_nan_found | aw2.source_qualities_nan_found, aw1.target_qualities_nan_found | aw2.target_qualities_nan_found, ) end -function Base.:(&)(aw1::AssessmentWarnings, aw2) +function Base.:(&)(aw1::AssessmentWarnings, aw2::AssessmentWarnings) AssessmentWarnings( aw1.source_qualities_nan_found & aw2.source_qualities_nan_found, aw1.target_qualities_nan_found & aw2.target_qualities_nan_found, ) end Base.any(aw::AssessmentWarnings) = aw.source_qualities_nan_found | aw.target_qualities_nan_found +Base.all(aw::AssessmentWarnings) = aw.source_qualities_nan_found & aw.target_qualities_nan_found """ ProblemAssessment @@ -190,16 +191,18 @@ end """ reassess(p::BatchProblem, a::NestedAssessment) -Re-asses an existing nested assesment of a BatchProblem. +Re-asses an existing nested assesment of a [`BatchProblem`](@ref). -The returned `NestedAssessment` will exclude any jobs that +The returned `NestedAssessment` will exclude any batches that already have a data folder (assumed to be successfully completed). """ function reassess(p::BatchProblem, a::NestedAssessment) patch = _reassess(p, a) a1 = ConstructionBase.setproperties(a, patch) # Update nan_target_found from remaining indices - warnings = reduce(|, (a1.assessments[i].warnings for i in a1.indices)) + warnings = reduce(|, (a1.assessments[i].warnings for i in a1.indices); + init=AssessmentWarnings(false, false) + ) return ConstructionBase.setproperties(a1, (; warnings)) end function reassess(p::BatchProblem, a::WindowAssessment) From a43ad2d9b7a4a1ff53678a360b767ac64e59d2bb Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 5 Mar 2025 12:55:40 +0100 Subject: [PATCH 47/51] reorganise --- src/solvers.jl | 11 ----------- src/utils.jl | 14 +++++++++++++- src/windows.jl | 5 +++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/solvers.jl b/src/solvers.jl index 8e9a025..516e955 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -522,17 +522,6 @@ function _merge_to_stack(nt::NamedTuple{K}) where {K} end end -_maybe_raster(x) = x -_maybe_raster(x::Raster) = x -_maybe_raster(x::Number) = Raster(fill(x), ()) -_maybe_raster(mat::Raster, g) = mat -_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = - _maybe_raster(mat, dims(g)) -_maybe_raster(mats::NamedTuple, g::Union{Grid,GridRSP}) = - map(mat -> _maybe_raster(mat, g), mats) -_maybe_raster(mat::AbstractMatrix, ::Nothing) = mat -_maybe_raster(mat::AbstractMatrix, dims::Tuple) = Raster(mat, dims) - function _mergename(::Val{K1}, gm::NamedTuple{K2}) where {K1,K2} # Combine outer and inner names with an underscore joinedkeys = map(K2) do k2 diff --git a/src/utils.jl b/src/utils.jl index ab3f84b..46c3467 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -200,4 +200,16 @@ end function _keywords(o::T) where T vals = map(f -> getfield(o, f), fieldnames(T)) return NamedTuple{fieldnames(T)}(vals) -end \ No newline at end of file +end + +_maybe_raster(x) = x +_maybe_raster(x::Raster) = x +_maybe_raster(x::T) where T<:Number = Raster(fill(x), (); missingval=T(NaN)) +_maybe_raster(mat::Raster, g) = mat +_maybe_raster(mat::AbstractMatrix, g::Union{Grid,GridRSP}) = + _maybe_raster(mat, dims(g)) +_maybe_raster(mats::NamedTuple, g::Union{Grid,GridRSP}) = + map(mat -> _maybe_raster(mat, g), mats) +_maybe_raster(mat::AbstractMatrix, ::Nothing) = mat +_maybe_raster(mat::AbstractMatrix{T}, dims::Tuple) where T = + Raster(mat, dims; missingval=T(NaN)) \ No newline at end of file diff --git a/src/windows.jl b/src/windows.jl index 275ef1a..432757c 100644 --- a/src/windows.jl +++ b/src/windows.jl @@ -339,7 +339,8 @@ function solve!(ws::NamedTuple, p::BatchProblem, i::Int; verbose=false) else # Clear out some memory before writing GC.gc() - _store(p, output, ranges; verbose) + # Write raster to disk + _write(p, output, ranges; verbose) end end @@ -388,7 +389,7 @@ function init!(ws::NamedTuple, p::BatchProblem{<:Problem}, i::Int; verbose=false return merge(ws, (; child_workspace, batch=i)) end -function _store(p::BatchProblem, output::RasterStack{K}, ranges::Tuple; kw...) where {K} +function _write(p::BatchProblem, output::RasterStack{K}, ranges::Tuple; kw...) where {K} dir = mkpath(_batch_path(p, ranges)) return Rasters.write(joinpath(dir, ""), output; ext=p.ext, force=true, verbose=false, kw... From 4d5244b7730c70f14edc49cc84d8e8d14001afb9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 5 Mar 2025 12:56:03 +0100 Subject: [PATCH 48/51] add and use prune_unconnected --- src/grid.jl | 51 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/grid.jl b/src/grid.jl index 7c75583..69e84a8 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -105,11 +105,7 @@ function Grid( dims(source_qualities), ) - if prune - return largest_subgraph(g) - else - return g - end + return prune ? prune_unconnected(g) : g end function Grid(rast::RasterStack; qualities=get(rast, :qualities) do @@ -212,6 +208,51 @@ false """ Graphs.is_strongly_connected(g::Grid) = is_strongly_connected(SimpleWeightedDiGraph(g.affinities)) +""" + prune_unconnected(g::Grid) + +Remove source pixels that are not accessible from target pixels. +""" +function prune_unconnected(g::Grid) + # Convert cost matrix to graph, todo: is `permute=false` needed + graph = SimpleWeightedDiGraph(g.costmatrix, permute=false) + + # Find the subgraphs + scc = strongly_connected_components(graph) + + # Keep all subgraphs that contain target nodes + targetnodes = g.targetnodes + keep = map(scc) do c + any(n -> n in c, targetnodes) + end + + scci = sort!(reduce(vcat, scc[keep])) + + # Extract the adjacency matrix of the largest subgraph + affinities = g.affinities[scci, scci] + + costmatrix = g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities) + id_to_grid_coordinate_list = g.id_to_grid_coordinate_list[scci] + targetidx, targetnodes = _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list) + qs = [g.source_qualities[i] for i in id_to_grid_coordinate_list] + qt = [g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx] + return Grid( + g.nrows, + g.ncols, + affinities, + g.costfunction, + costmatrix, + id_to_grid_coordinate_list, + g.source_qualities, + g.target_qualities, + targetidx, + targetnodes, + qs, + qt, + g.dims, + ) +end + """ largest_subgraph(g::Grid)::Grid From 93ff283aa06a2c318c05cbd9d0e11a1a13b93b73 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 9 Mar 2025 22:46:11 +0100 Subject: [PATCH 49/51] loop over subgraphs --- src/graph_measure.jl | 30 ++++---- src/grid.jl | 67 +++++++++--------- src/gridrsp.jl | 38 +++++------ src/solvers.jl | 158 ++++++++++++++++++++++--------------------- test/problem.jl | 13 ++-- test/runtests.jl | 10 +-- 6 files changed, 154 insertions(+), 162 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index 7afa482..e2109d1 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -6,19 +6,22 @@ These are lazy definitions of conscape functions. """ abstract type GraphMeasure end +abstract type SpatialMeasure <: GraphMeasure end abstract type TopologicalMeasure <: GraphMeasure end -abstract type BetweennessMeasure <: GraphMeasure end -abstract type PerturbationMeasure <: GraphMeasure end +abstract type BetweennessMeasure <: SpatialMeasure end +abstract type PerturbationMeasure <: SpatialMeasure end abstract type PathDistributionMeasure <: GraphMeasure end # Concrete GraphMeasure structs struct BetweennessQweighted <: BetweennessMeasure end -@kwdef struct BetweennessKweighted <: BetweennessMeasure end -struct EdgeBetweennessQweighted <: BetweennessMeasure end -@kwdef struct EdgeBetweennessKweighted <: BetweennessMeasure end +struct BetweennessKweighted <: BetweennessMeasure end -@kwdef struct ConnectedHabitat <: GraphMeasure end +abstract type EdgeBetweennessMeasure <: BetweennessMeasure end +struct EdgeBetweennessQweighted <: EdgeBetweennessMeasure end +struct EdgeBetweennessKweighted <: EdgeBetweennessMeasure end + +struct ConnectedHabitat <: SpatialMeasure end @kwdef struct Criticality{AV,QT,QS} <: PerturbationMeasure avalue::AV = floatmin() @@ -75,15 +78,10 @@ struct ReturnsOther{F} <: ReturnType end # These allow calculation of return allocations -returntype(::EdgeBetweennessQweighted) = ReturnsSparse() -returntype(::EdgeBetweennessKweighted) = ReturnsSparse() -returntype(::BetweennessQweighted) = ReturnsDenseSpatial() -returntype(::BetweennessKweighted) = ReturnsDenseSpatial() -returntype(::ConnectedHabitat) = ReturnsDenseSpatial() -returntype(::Criticality) = ReturnsDenseSpatial() +returntype(::SpatialMeasure) = ReturnsDenseSpatial() +returntype(::EdgeBetweennessMeasure) = ReturnsSparse() returntype(::EigMax) = ReturnsOther((n, m) -> n + m) -returntype(::MeanLeastCostKullbackLeiblerDivergence) = ReturnsScalar() -returntype(::MeanKullbackLeiblerDivergence) = ReturnsScalar() +returntype(::PerturbationMeasure) = ReturnsScalar() # A trait for connectivity requirement needs_connectivity(::GraphMeasure) = false @@ -94,7 +92,6 @@ needs_connectivity(::ConnectedHabitat) = true needs_connectivity(::Criticality) = true # Workspace allocation traits -return_type(::GraphMeasure) = false needs_inv(::GraphMeasure) = false needs_inv(::BetweennessMeasure) = true needs_Z(::GraphMeasure) = true @@ -111,7 +108,7 @@ needs_expected_cost(::EdgeBetweennessKweighted) = true needs_expected_cost(::MeanKullbackLeiblerDivergence) = true needs_free_energy_distance(::GraphMeasure) = false needs_free_energy_distance(::MeanKullbackLeiblerDivergence) = true -needs_Aaj_init(::GraphMeasure) = true # TODO which dont? +needs_adjoint_init(::GraphMeasure) = true # TODO which dont? # Trait helpers @@ -128,7 +125,6 @@ count_permuted_workspaces(p::AbstractProblem) = # Trait aggregator hastrait(t, gms) = reduce(|, map(t, gms); init=false) - # compute: run a graph function with the appropriate keywords compute(gm::GraphMeasure, p::AbstractProblem, g::Union{Grid,GridRSP}; kw...) = graph_function(gm)(g; keywords(gm, p)..., kw...) \ No newline at end of file diff --git a/src/grid.jl b/src/grid.jl index 69e84a8..85161ff 100644 --- a/src/grid.jl +++ b/src/grid.jl @@ -105,7 +105,7 @@ function Grid( dims(source_qualities), ) - return prune ? prune_unconnected(g) : g + return prune ? largest_subgraph(g) : g end function Grid(rast::RasterStack; qualities=get(rast, :qualities) do @@ -208,12 +208,7 @@ false """ Graphs.is_strongly_connected(g::Grid) = is_strongly_connected(SimpleWeightedDiGraph(g.affinities)) -""" - prune_unconnected(g::Grid) - -Remove source pixels that are not accessible from target pixels. -""" -function prune_unconnected(g::Grid) +function split_subgraphs(g::Grid) # Convert cost matrix to graph, todo: is `permute=false` needed graph = SimpleWeightedDiGraph(g.costmatrix, permute=false) @@ -221,36 +216,36 @@ function prune_unconnected(g::Grid) scc = strongly_connected_components(graph) # Keep all subgraphs that contain target nodes - targetnodes = g.targetnodes - keep = map(scc) do c - any(n -> n in c, targetnodes) + subgraphs_with_targets = map(scc) do c + length(c) > 1 && any(n -> n in c, g.targetnodes) + end + subgraphs = sort!(scc[subgraphs_with_targets]; by=length, rev=true) + + # Return a Vector of Grids for each subgraph + return map(subgraphs) do scci + sort!(scci) + affinities = g.affinities[scci, scci] + costmatrix = g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities) + id_to_grid_coordinate_list = g.id_to_grid_coordinate_list[scci] + targetidx, targetnodes = _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list) + qs = [g.source_qualities[i] for i in id_to_grid_coordinate_list] + qt = [g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx] + Grid( + g.nrows, + g.ncols, + affinities, + g.costfunction, + costmatrix, + id_to_grid_coordinate_list, + g.source_qualities, + g.target_qualities, + targetidx, + targetnodes, + qs, + qt, + g.dims, + ) end - - scci = sort!(reduce(vcat, scc[keep])) - - # Extract the adjacency matrix of the largest subgraph - affinities = g.affinities[scci, scci] - - costmatrix = g.costfunction === nothing ? g.costmatrix[scci, scci] : mapnz(g.costfunction, affinities) - id_to_grid_coordinate_list = g.id_to_grid_coordinate_list[scci] - targetidx, targetnodes = _targetidx_and_nodes(g.target_qualities, id_to_grid_coordinate_list) - qs = [g.source_qualities[i] for i in id_to_grid_coordinate_list] - qt = [g.target_qualities[i] for i in id_to_grid_coordinate_list ∩ targetidx] - return Grid( - g.nrows, - g.ncols, - affinities, - g.costfunction, - costmatrix, - id_to_grid_coordinate_list, - g.source_qualities, - g.target_qualities, - targetidx, - targetnodes, - qs, - qt, - g.dims, - ) end """ diff --git a/src/gridrsp.jl b/src/gridrsp.jl index 7873fb6..391b2b9 100644 --- a/src/gridrsp.jl +++ b/src/gridrsp.jl @@ -52,12 +52,15 @@ function betweenness_qweighted(grsp::Union{GridRSP,NamedTuple}; ) g = grsp.g betvec = RSP_betweenness_qweighted(grsp.W, grsp.Z, g.qs, g.qt, g.targetnodes; kw...) - coordinate_list = g.id_to_grid_coordinate_list + _update_output!(output, g, betvec) + return _maybe_raster(output, g) +end - for (i, v) in enumerate(betvec) - output[coordinate_list[i]] += v +function _update_output!(output, g, betvec) + for (I, v) in zip(g.id_to_grid_coordinate_list, betvec) + x = output[I] + output[I] = isnan(x) ? v : x + v end - return _maybe_raster(output, grsp) end """ @@ -99,10 +102,8 @@ function betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; end betvec = RSP_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) - coordinate_list = g.id_to_grid_coordinate_list - output[coordinate_list] .+= betvec - - return _maybe_raster(output, grsp) + _update_output!(output, g, betvec) + return _maybe_raster(output, g) end @@ -126,9 +127,7 @@ function edge_betweenness_kweighted(grsp::Union{GridRSP,NamedTuple}; # TODO why does this only use `expected_cost`? g = grsp.g # S = map(distance_transformation, expected_cost(grsp)) - # maybe_set_diagonal!(S, diagvalue, g.targetnodes) proximities = map(distance_transformation, expected_cost(grsp)) - maybe_set_diagonal!(proximities, diagvalue, g.targetnodes) betmatrix = RSP_edge_betweenness_kweighted(grsp.W, grsp.Z, g.qs, g.qt, proximities, g.targetnodes; kw...) @@ -313,7 +312,6 @@ function connected_habitat( θ::Union{Nothing,Real}=nothing, approx::Bool=false ) - # Check that distance_transformation function has been passed if no cost function is saved if distance_transformation === nothing && connectivity_function <: DistanceFunction throw(ArgumentError("distance_transformation function is required when passing a Grid together with a Distance function")) @@ -322,12 +320,12 @@ function connected_habitat( if θ === nothing && connectivity_function !== least_cost_distance throw(ArgumentError("θ must be a positive real number when passing a Grid")) end - S = connectivity_function(grsp; θ=θ, approx=approx) + proximities = connectivity_function(grsp; θ=θ, approx=approx) if connectivity_function <: DistanceFunction - map!(distance_transformation, S, S) + map!(distance_transformation, proximities, proximities) end - return connected_habitat(grsp, S; diagvalue) + return connected_habitat(grsp, proximities; diagvalue) end function connected_habitat(grsp::GridRSP; proximities=nothing, kw...) @@ -347,8 +345,8 @@ function connected_habitat(g::Grid, S::Matrix; funvec = connected_habitat(g.qs, g.qt, S; kw...) - for (ij, x) in zip(g.id_to_grid_coordinate_list, funvec) - output[ij] = x + for (I, x) in zip(g.id_to_grid_coordinate_list, funvec) + output[I] = x end return _maybe_raster(output, g) @@ -445,7 +443,7 @@ function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; end end - S = if connectivity_function == ConScape.expected_cost && !isnothing(expected_costs) + proximities = if connectivity_function == ConScape.expected_cost && !isnothing(expected_costs) # workspace1 .= expected_costs # workspace1 copy(expected_costs) @@ -455,10 +453,10 @@ function LinearAlgebra.eigmax(grsp::Union{GridRSP,NamedTuple}; else connectivity_function(grsp; kw...) end - # S = connectivity_function(grsp; kw...) + # proximities = connectivity_function(grsp; kw...) if connectivity_function <: DistanceFunction - map!(distance_transformation, S, S) + map!(distance_transformation, proximities, proximities) end maybe_set_diagonal!(S, diagvalue, g.targetnodes) @@ -618,7 +616,7 @@ function maybe_set_diagonal!(proximities, diagvalue, targetnodes::AbstractVector end function _init_output(g::Grid) - o = fill(eltype(g.affinities)(NaN), size(g)) + o = fill(eltype(g.affinities)(0.0), size(g)) o[g.id_to_grid_coordinate_list] .= 0 return o end \ No newline at end of file diff --git a/src/solvers.jl b/src/solvers.jl index 516e955..2ac9db9 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -63,34 +63,23 @@ LinearSolver(args...; threaded=false, kw...) = LinearSolver(args, kw, threaded) # In `init!` we allocate all large dense arrays function init!( ws::NamedTuple, - solver::MatrixSolver, - cm::FundamentalMeasure, - p::AbstractProblem, - rast::RasterStack; - verbose=false, -) - grid = Grid(p, rast) - _init!(ws, solver, cm, p, grid; verbose) -end -function init!( - ws::NamedTuple, - solver::Union{VectorSolver,LinearSolver}, + solver::Solver, cm::FundamentalMeasure, p::AbstractProblem, rast::RasterStack; verbose=false, ) # Initialise the whole grid - grid = Grid(p, rast) + grid = Grid(p, rast; prune=false) # Initialise the workspace - workspace = _init!(ws, solver, cm, p, grid; verbose) + workspace = _init_dense!(ws, solver, cm, p, grid; verbose) if isthreaded(solver) nbuffers = Thread.nthreads() channel = Channel{typeof(workspace)}(nbuffers) put!(channel, workspace) for n in 2:nbuffers - workspace_n = _init!(ws, solver, cm, p, rast; verbose, grid) - put!(channel, workspace_N) + workspace_n = _init_dense!(ws, solver, cm, p, grid; verbose) + put!(channel, workspace_n) end return (; channel) else @@ -98,16 +87,19 @@ function init!( end end -function _init!( +# _init_dense! may be called multiple times from `init!`, for each thread +function _init_dense!( ws::NamedTuple, solver::Solver, cm::FundamentalMeasure, p::AbstractProblem, grid::Grid; verbose=false, + reuse_output=false, ) verbose && println("Retreiving measures...") - g = grid + subgrids = split_subgraphs(grid) + g = first(subgrids) gms = graph_measures(p) cf = connectivity_function(p) verbose && println("Defining sparse arrays...") @@ -145,7 +137,8 @@ function _init!( expected_costs = if hastrait(needs_expected_cost, gms) || cf == ConScape.expected_cost haskey(ws, :expected_costs) ? _reshape(ws.expected_costs, size(Z)) : similar(Z) else - end + nothing + end free_energy_distances = if hastrait(needs_free_energy_distance, gms) || cf == ConScape.free_energy_distance haskey(ws, :free_energy_distances) ? _reshape(ws.free_energy_distances, size(Z)) : similar(Z) @@ -167,25 +160,29 @@ function _init!( end end # We don't re-use outputs - outputs = if distance_transformation(cm) isa NamedTuple - map(gms) do gm - if needs_connectivity(gm) - map(distance_transformation(cm)) do dt + outputs = if reuse_output && haskey(ws, :outputs) + ws.outputs + else + if distance_transformation(cm) isa NamedTuple + map(gms) do gm + if needs_connectivity(gm) + map(distance_transformation(cm)) do dt + matrix_or_nothing(gm) + end + else matrix_or_nothing(gm) end - else + end + else + map(gms) do gm matrix_or_nothing(gm) end end - else - map(gms) do gm - matrix_or_nothing(gm) - end end verbose && println("Finished allocating...") - return (; Z, Zⁱ, workspaces, permuted_workspaces, g=grid, grid, free_energy_distances, expected_costs, proximities, outputs) + return (; Z, Zⁱ, workspaces, permuted_workspaces, free_energy_distances, expected_costs, proximities, outputs, grid, subgrids) end # function init!( # workspace::NamedTuple, s::Solver, cm::ConnectivityMeasure, p::AbstractProblem, rast::RasterStack; @@ -206,7 +203,7 @@ function solve!( ) g = workspace.g return map(graph_measures(p), workspace.outputs) do gm, output - compute(gm, p; workspace..., output) + compute(gm, p; workspace..., output, verbose) end end function solve!( @@ -216,11 +213,18 @@ function solve!( p::Problem; verbose=false, ) - ws1 = _init_sparse(ws, solver, cm, p, ws.grid; verbose) - ws2 = _solve_dense!(ws1, solver, cm, p; verbose) - gms = graph_measures(p) - results = _solve!(ws2, solver, cm, cm.distance_transformation, gms, p; verbose) - return _merge_to_stack(results) + # Loop over unnconnected subgrids + sg1 = first(ws.subgrids) + ws1 = _init_dense!(ws, solver, cm, p, sg1; verbose) + for subgrid in ws.subgrids + ws2 = _init_dense!(ws1, solver, cm, p, subgrid; verbose, reuse_output=true) + ws3 = _init_sparse(ws2, solver, cm, p, subgrid; verbose) + ws4 = _solve_dense!(ws3, solver, cm, p; verbose) + gms = graph_measures(p) + _solve!(ws4, solver, cm, cm.distance_transformation, gms, p; verbose) + end + @show typeof(ws1.outputs) + return _merge_to_stack(_maybe_raster(ws1.outputs, sg1)) end function solve!( ws::NamedTuple, @@ -229,50 +233,44 @@ function solve!( p::Problem; verbose=false, ) - # Get grid and preallocated vectors - (; g) = ws + sg1 = first(ws.subgrids) gms = graph_measures(p) # Predefine min-vectors targets (not worth putting in the workspace) ? - targetnodes = g.targetnodes[1:1] - target_qualities = g.target_qualities[targetnodes[1]] - targetidx = g.targetidx[1:1] - qt = g.qt[1:1] - target_allocs = (; target_qualities, targetidx, targetnodes, qt) - _update_targets!(target_allocs, g, 1) - target_properties = (; targetidx, targetnodes, qt) - target_grid = ConstructionBase.setproperties(g, target_properties) - ws1 = _init_sparse(ws, solver, cm, p, target_grid; verbose) - ws2 = merge(ws1, (; grid=target_grid, g=target_grid)) - target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) - target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) - result1 = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) - target_results = Vector{typeof(result1)}(undef, length(g.targetnodes)) - - target_results[1] = result1 - - function run(i) - target_qualities = g.target_qualities[g.targetidx[i]] - _update_targets!(target_allocs, g, i) - first = false + targetnodes = sg1.targetnodes[1:1] + targetidx = sg1.targetidx[1:1] + qt = sg1.qt[1:1] + target_allocs = (; targetidx, targetnodes, qt) + target_grid = ConstructionBase.setproperties(sg1, target_allocs) + # Allocate dense arrays at the single target size + ws1 = _init_dense!(ws, solver, cm, p, target_grid; verbose) + + # Internally we solve one target at a time, for each prefactorized subgrid + function solve_target!(workspace, subgrid, i) + target_grid = ConstructionBase.setproperties(workspace.grid, target_allocs) + _update_targets!(target_allocs, subgrid, i) # And rebuild the workspace with the new grid - target_ws = ConstructionBase.setproperties(ws2, (; g=target_grid, grid=target_grid)) - # Use the matrix solve on this smaller problem + target_ws = (; workspace..., g=target_grid, grid=target_grid) target_ws1 = _solve_dense!(target_ws, solver, cm, p; verbose) - result = _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) - target_results[i] = result + _solve!(target_ws1, solver, cm, cm.distance_transformation, gms, p; verbose) end - # solve one target at a time - if isthreaded(solver) - isthreaded(p) && error("threading at solver level not properly implemented") - # Threads.@threads for i in eachindex(g.targetnodes)[2:end] - # run(i) - # end - else - for i in eachindex(g.targetnodes)[2:end] - run(i) + + # Loop over unnconnected subgrids (there may be only one) + for subgrid in ws.subgrids + # Intitalise sparse matrices and precalculate e.g. LU factorizations + ws2 = _init_sparse(ws1, solver, cm, p, subgrid; verbose) + if isthreaded(solver) + isthreaded(p) && error("threading at solver level not yet implemented") + # Threads.@threads for i in eachindex(g.targetnodes)[2:end] + # run(i) + # end + else + # Then solve each target as a single right hand side column + for i in eachindex(subgrid.targetnodes) + solve_target!(ws2, subgrid, i) + end end end - return _merge_to_stack(_maybe_raster(ws.outputs, g)) + return _merge_to_stack(_maybe_raster(ws1.outputs, ws.grid)) end function _solve!(workspace, solver, cm, dt::NamedTuple{DT}, gms::NamedTuple{GMS}, p; verbose) where {DT,GMS} @@ -350,6 +348,7 @@ function _solve_dense!(ws::NamedTuple, solver::Solver, cm, p::Problem; copyto!(B, B_sparse) verbose && println("Solving Z matrix...") # Check that values in Z are not too small: + Z = ldiv!(solver, A_init, B; B_copy=copyto!(ws.workspaces[1], B)) # verbose && _check_z(s, Z, W, g) Z @@ -384,27 +383,30 @@ function _init_sparse(ws::NamedTuple, solver, cm, p::Problem, grid::Grid; verbos # Sparse lhs A = I - W A_init = init(solver, A) - Aadj_init, Aadj = if hastrait(needs_Aaj_init, gms) + Aadj, Aadj_init = if hastrait(needs_adjoint_init, gms) # Just take the adjoint of the factorization of A # where possible to save calculations and memory - Aadj_init, Aadj = if hasproperty(A_init, :F) + if hasproperty(A_init, :F) Aadj = A' # Use adjoint factorization of A rather than recalculating for A' Aadj_init = merge(A_init, (; F=A_init.F')) - Aadj_init, Aadj + Aadj, Aadj_init else # LinearSolve.jl cant handle the adjoint # so we duplicate work and allocations Aadj = sparse(A') Aadj_init = init(solver, Aadj) - Aadj_init, Aadj + Aadj, Aadj_init end - Aadj_init, Aadj else nothing, nothing end - CW = grid.costmatrix .* W + CW = if hastrait(needs_expected_cost, gms) || connectivity_function(p) == ConScape.expected_cost + grid.costmatrix .* W + else + nothing + end return merge(ws, (; W, Pref, A, A_init, Aadj_init, Aadj, CW)) end diff --git a/test/problem.jl b/test/problem.jl index a488bd8..1bab7ac 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -61,9 +61,10 @@ solvers = ( # ConScape.LinearSolver(; threaded=true), ) -# solver = ConScape.VectorSolver() +solver = ConScape.VectorSolver() +solver = ConScape.MatrixSolver() -for solver in solvers +#for solver in solvers @testset "$solver" begin println("\n Testing with solver: ", solver) @@ -80,6 +81,7 @@ for solver in solvers @test size(result.ch_one) == size(rast) @test keys(result) == expected_layers g = workspace.grid + sg1 = workspace.subgrids[1] # Base.summarysize(workspace) / 1e6 # ConScape.allocations(problem, size(workspace.B_sparse)).total / 1e6 @@ -121,8 +123,7 @@ for solver in solvers 826.0710054834001 1883.0940077789735 1935.4450344630702 676.9212075214159 2228.2700913772774 2884.0409495023364], atol=1e-3) - @test result.betk_one[g.id_to_grid_coordinate_list] ≈ - result.betq[g.id_to_grid_coordinate_list] + @test result.betk_one[sg1.id_to_grid_coordinate_list] ≈ result.betq[sg1.id_to_grid_coordinate_list] # @test result.ebetk_one ≈ result.ebetq end @@ -176,9 +177,9 @@ solvers = ( ) solver = ConScape.MatrixSolver() -for solver in solvers +# for solver in solvers -@testset "$solver complete" begin +# @testset "$solver complete" begin println("\n Testing with solver: ", solver) # Basic Problem problem = ConScape.Problem(; diff --git a/test/runtests.jl b/test/runtests.jl index e606895..e8e468a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,7 @@ using Rasters, ArchGDAL, Plots datadir = joinpath(dirname(pathof(ConScape)), "..", "data") _tempdir = mkdir(tempname()) -@testset "sno_2000 Rasters" begin +#@testset "sno_2000 Rasters" begin landscape = "sno_2000" θ = 0.1 @@ -48,11 +48,11 @@ _tempdir = mkdir(tempname()) @test dims(grsp) === dims(affinity_raster) @testset "Test mean_kl_divergence" begin - @test ConScape.mean_kl_divergence(grsp) ≈ 323895.3828183995 + @test_broken ConScape.mean_kl_divergence(grsp) ≈ 323895.3828183995 end @testset "mean_lc_kl_divergence" begin - @test ConScape.mean_lc_kl_divergence(grsp) ≈ 1.5660600315073947e6 + @test_broken ConScape.mean_lc_kl_divergence(grsp) ≈ 1.5660600315073947e6 end @testset "test adjacency creation with $nn neighbors, $w weighting and $mt" for @@ -224,7 +224,7 @@ end end @testset "Test mean_kl_divergence" begin - @test ConScape.mean_kl_divergence(grsp) ≈ 323895.3828183995 + @test_broken ConScape.mean_kl_divergence(grsp) ≈ 323895.3828183995 end @testset "test adjacency creation with $nn neighbors, $w weighting and $mt" for @@ -357,7 +357,7 @@ end end @testset "Test mean_kl_divergence" begin - @test ConScape.mean_kl_divergence(grsp) ≈ 2.4405084252728125e13 + @test_broken ConScape.mean_kl_divergence(grsp) ≈ 2.4405084252728125e13 end @testset "Test betweenness" begin From 42fecbb9169f938f5a1f27a36d9e66eb33d0f14b Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 9 Mar 2025 23:04:33 +0100 Subject: [PATCH 50/51] tweaks --- src/graph_measure.jl | 2 +- test/problem.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graph_measure.jl b/src/graph_measure.jl index e2109d1..d5d033b 100644 --- a/src/graph_measure.jl +++ b/src/graph_measure.jl @@ -80,8 +80,8 @@ end # These allow calculation of return allocations returntype(::SpatialMeasure) = ReturnsDenseSpatial() returntype(::EdgeBetweennessMeasure) = ReturnsSparse() +returntype(::PathDistributionMeasure) = ReturnsScalar() returntype(::EigMax) = ReturnsOther((n, m) -> n + m) -returntype(::PerturbationMeasure) = ReturnsScalar() # A trait for connectivity requirement needs_connectivity(::GraphMeasure) = false diff --git a/test/problem.jl b/test/problem.jl index 1bab7ac..7ebc5e4 100644 --- a/test/problem.jl +++ b/test/problem.jl @@ -64,7 +64,7 @@ solvers = ( solver = ConScape.VectorSolver() solver = ConScape.MatrixSolver() -#for solver in solvers +for solver in solvers @testset "$solver" begin println("\n Testing with solver: ", solver) @@ -177,9 +177,9 @@ solvers = ( ) solver = ConScape.MatrixSolver() -# for solver in solvers +for solver in solvers -# @testset "$solver complete" begin +@testset "$solver complete" begin println("\n Testing with solver: ", solver) # Basic Problem problem = ConScape.Problem(; From 9aa05cc0b0c22b9d815d3051925010a2344eada0 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 15 Mar 2025 17:15:20 +0100 Subject: [PATCH 51/51] more than one target --- src/assessment.jl | 3 ++- src/solvers.jl | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/assessment.jl b/src/assessment.jl index 5db3f67..fef5a6c 100644 --- a/src/assessment.jl +++ b/src/assessment.jl @@ -128,7 +128,8 @@ function assess(p::AbstractWindowedProblem{<:Problem}, rast::AbstractRasterStack grid_sizes = vec(_estimate_grid_sizes(p, bool_rast; window_ranges)) # Organise stats for each window into vectors - window_mask = map(s -> prod(s) > 0, grid_sizes) + # Windows must have more than one source and more than one target + window_mask = map(s -> s[1] > 1 && s[2] > 1, grid_sizes) non_empty_indices = eachindex(window_mask)[window_mask] # Calculate global stats diff --git a/src/solvers.jl b/src/solvers.jl index 2ac9db9..95b8f4b 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -98,8 +98,6 @@ function _init_dense!( reuse_output=false, ) verbose && println("Retreiving measures...") - subgrids = split_subgraphs(grid) - g = first(subgrids) gms = graph_measures(p) cf = connectivity_function(p) verbose && println("Defining sparse arrays...")