From fcb860eff399b6f0b2b6ab2a784e231a4a4350e6 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 20 Aug 2024 11:06:09 +0200 Subject: [PATCH 01/19] Add algorithm --- Project.toml | 8 +- src/L2Penalty_alg.jl | 322 +++++++++++++++++++++++++++++++++ src/RegularizedOptimization.jl | 5 +- test/runtests.jl | 22 ++- 4 files changed, 351 insertions(+), 6 deletions(-) create mode 100644 src/L2Penalty_alg.jl diff --git a/Project.toml b/Project.toml index d5ba1185..13d9c3ed 100644 --- a/Project.toml +++ b/Project.toml @@ -4,26 +4,30 @@ author = ["Robert Baraldi and Dominique Orban cons!(nlp, x, c), + (j, x) -> jac_coord!(nlp, x, j.vals), + A, + b + ) + + # Allocate sub_ψ = ||c(x)|| to solve min f(x) + τ||c(x)|| + sub_ψ = CompositeNormL2(1.0, + (c, x) -> cons!(nlp, x, c), + (j, x) -> jac_coord!(nlp, x, j.vals), + A, + b + ) + sub_nlp = RegularizedNLPModel(nlp, sub_ψ) + sub_stats = GenericExecutionStats(nlp) + + return L2PenaltySolver( + x, + s, + s0, + ψ, + sub_ψ, + sub_solver(sub_nlp), + sub_stats + ) +end + + +""" + L2Penalty(nlp; kwargs…) + +An exact ℓ₂-penalty method for the problem + + min f(x) s.t c(x) = 0 + +where f: ℝⁿ → ℝ and c: ℝⁿ → ℝᵐ respectively have a Lipschitz-continuous gradient and Jacobian. + +At each iteration k, an iterate is computed as + + xₖ ∈ argmin f(x) + τₖ‖c(x)‖₂ + +where τₖ is some penalty parameter. +This nonsmooth problem is solved using `R2` (see `R2` for more information) with the first order model ψ(s;x) = τₖ‖c(x) + J(x)s‖₂ + +For advanced usage, first define a solver "L2PenaltySolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = L2PenaltySolver(nlp) + solve!(solver, nlp) + + stats = GenericExecutionStats(nlp) + solver = L2PenaltySolver(nlp) + solve!(solver, nlp, stats) + +# Arguments +* `nlp::AbstractNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance +- `ktol::T = eps(T)^(1 / 4)`: the initial tolerance sent to the subsolver +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `sub_max_eval::Int = -1`: maximum number of evaluation for the subsolver (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `sub_max_iter::Int = 10000`: maximum number of iterations for the subsolver; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `sub_verbose::Int = 0`: if > 0, display subsolver iteration details every `verbose` iteration; +- `τ::T = T(100)`: initial penalty parameter; +- `β1::T = τ`: penalty update parameter: τₖ <- τₖ + β1; +- `β2::T = T(0.1)`: tolerance decreasing factor, at each iteration, ktol <- β2*ktol; +- `β3::T = 1/τ`: initial regularization parameter σ₀ = β3/τₖ at each iteration; +- `β4::T = eps(T)`: minimal regularization parameter σ for `R2`; +other 'kwargs' are passed to `R2` (see `R2` for more information). + +The algorithm stops either when `√θₖ < atol + rtol*√θ₀ ` or `θₖ < 0` and `√(-θₖ) < neg_tol` where θₖ := ‖c(xₖ)‖₂ - ‖c(xₖ) + J(xₖ)sₖ‖₂, and √θₖ is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.sub_solver`: a `R2Solver` structure holding relevant information on the subsolver state, see `R2` for more information; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. +You can also use the `sub_callback` keyword argument which has exactly the same structure and in sent to `R2`. +""" +function L2Penalty( + nlp::AbstractNLPModel{T, V}; + kwargs...) where{ T <: Real, V } + if !equality_constrained(nlp) + error("L2Penalty: This algorithm only works for equality contrained problems.") + end + solver = L2PenaltySolver(nlp) + stats = GenericExecutionStats(nlp) + solve!( + solver, + nlp, + stats; + kwargs... + ) + return stats +end + +function SolverCore.solve!( + solver::L2PenaltySolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + sub_callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol = eps(T)^(1/4), + ktol::T = eps(T)^(1/4), + max_iter::Int = 10000, + sub_max_iter::Int = 10000, + max_time::T = T(30.0), + max_eval::Int = -1, + sub_max_eval::Int = -1, + verbose = 0, + sub_verbose = 0, + τ::T = T(100), + β1::T = τ, + β2::T = T(0.1), + β3::T = 1/τ, + β4::T = eps(T), + kwargs..., + ) where {T, V} + + reset!(stats) + + # Retrieve workspace + h = NormL2(1.0) + ψ = solver.ψ + sub_ψ = solver.sub_ψ + sub_ψ.h = NormL2(τ) + solver.sub_solver.ψ.h = NormL2(τ) + + x = solver.x .= x + s = solver.s + s0 = solver.s0 + shift!(ψ, x) + fx = obj(nlp, x) + hx = h(ψ.b) + + if verbose > 0 + @info log_header( + [:iter, :sub_iter, :fx, :hx, :theta, :xi, :epsk, :tau, :normx], + [Int, Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64], + hdr_override = Dict{Symbol,String}( # TODO: Add this as constant dict elsewhere + :iter => "outer", + :sub_iter => "inner", + :fx => "f(x)", + :hx => "h(x)", + :theta => "√θ", + :xi => "√(ξ/ν)", + :epsk => "ϵₖ", + :tau => "τ", + :normx => "‖x‖" + ), + colsep = 1, + ) + end + + set_iter!(stats, 0) + rem_eval = max_eval + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats,fx + hx) + set_solver_specific!(stats,:smooth_obj,fx) + set_solver_specific!(stats,:nonsmooth_obj, hx) + + local θ::T + prox!(s, ψ, s0, 1.0) + θ = hx - ψ(s) + θ ≥ 0 || error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") + sqrt_θ = sqrt(θ) + + atol += rtol * sqrt_θ # make stopping test absolute and relative + ktol = max(ktol,atol) # Keep ϵ₀ ≥ ϵ + tol_init = ktol # store value of ϵ₀ + + done = false + + while !done + model = RegularizedNLPModel(nlp, sub_ψ) + solve!( + solver.sub_solver, + model, + solver.sub_stats; + callback = sub_callback, + x = x, + atol = ktol, + rtol = T(0), + neg_tol = neg_tol, + verbose = sub_verbose, + max_iter = sub_max_iter, + max_time = max_time - stats.elapsed_time, + max_eval = min(rem_eval,sub_max_eval), + σmin = β4, + ν = 1/max(β4,β3*τ), + kwargs..., + ) + + x .= solver.sub_stats.solution + fx = solver.sub_stats.solver_specific[:smooth_obj] + hx = solver.sub_stats.solver_specific[:nonsmooth_obj]/τ + sqrt_ξ_νInv = solver.sub_stats.solver_specific[:xi] + + shift!(ψ, x) + prox!(s, ψ, s0, 1.0) + + θ = hx - ψ(s) + sqrt_θ = sqrt(θ) + + if sqrt_θ > ktol + τ = τ + β1 + sub_ψ.h = NormL2(τ) + solver.sub_solver.ψ.h = NormL2(τ) + else + ktol = max(β2^(ceil(log(β2,sqrt_ξ_νInv/tol_init)))*ktol,atol) #the β^... allows to directly jump to a sufficiently small ϵₖ + end + + solved = (sqrt_θ ≤ atol && solver.sub_stats.status == :first_order) || (θ < 0 && sqrt_θ ≤ neg_tol && solver.sub_stats.status == :first_order) + (θ < 0 && sqrt_θ > neg_tol) && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row(Any[stats.iter, solver.sub_stats.iter, fx, hx ,sqrt_θ, sqrt_ξ_νInv, ktol, τ, norm(x)], colsep = 1) + + set_iter!(stats, stats.iter + 1) + rem_eval = max_eval - neval_obj(nlp) + set_time!(stats, time() - start_time) + set_objective!(stats,fx + hx) + set_solver_specific!(stats,:smooth_obj,fx) + set_solver_specific!(stats,:nonsmooth_obj, hx) + set_solver_specific!(stats, :theta, sqrt_θ) + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + +end + +function get_status( + nlp::M; + elapsed_time = 0.0, + iter = 0, + optimal = false, + max_eval = Inf, + max_time = Inf, + max_iter = Inf, +) where{ M <: AbstractNLPModel } + if optimal + :first_order + elseif iter > max_iter + :max_iter + elseif elapsed_time > max_time + :max_time + elseif neval_obj(nlp) > max_eval && max_eval > -1 + :max_eval + else + :unknown + end +end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index e92f21da..b6415997 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -7,7 +7,7 @@ using LinearAlgebra, Logging, Printf using ProximalOperators, TSVD # dependencies from us -using LinearOperators, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore +using LinearOperators, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore, SparseMatricesCOO include("utils.jl") include("input_struct.jl") @@ -19,5 +19,6 @@ include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") +include("L2Penalty_alg.jl") -end # module RegularizedOptimization +end # module RegularizedOptimization \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 28e91086..d719b54a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using LinearAlgebra: length using LinearAlgebra, Random, Test using ProximalOperators -using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, RegularizedOptimization, OptimizationProblems, ADNLPModels, OptimizationProblems.ADNLPProblems const global compound = 1 const global nz = 10 * compound @@ -10,6 +10,24 @@ const global bpdn, bpdn_nls, sol = bpdn_model(compound) const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 +meta = OptimizationProblems.meta +problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0), :] + +for problem ∈ eachrow(problem_list) + for (nlp,subsolver_name) ∈ ((eval(Meta.parse(problem.name))(),"R2"),) + @testset "Optimization Problems - $(problem.name) - L2Penalty - $(subsolver_name)" begin + out = L2Penalty( + nlp, + verbose = 1, + ) + @test typeof(out.solution) == typeof(nlp.meta.x0) + @test length(out.solution) == nlp.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + end + end +end + for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * compound), "B0")) for solver_sym ∈ (:R2, :TR) @@ -134,4 +152,4 @@ for (h, h_name) ∈ ((NormL1(λ), "l1"),) end end -include("test_bounds.jl") +include("test_bounds.jl") \ No newline at end of file From e6497aa9979d916df0f016f8de27ce99ea0aa172 Mon Sep 17 00:00:00 2001 From: MohamedLaghdafHABIBOULLAH Date: Thu, 12 Sep 2024 11:58:07 -0400 Subject: [PATCH 02/19] Add R2N, R2DH and update LM --- src/LM_alg.jl | 20 ++- src/R2DH.jl | 297 +++++++++++++++++++++++++++++++++ src/R2N.jl | 292 ++++++++++++++++++++++++++++++++ src/RegularizedOptimization.jl | 2 + 4 files changed, 604 insertions(+), 7 deletions(-) create mode 100644 src/R2DH.jl create mode 100644 src/R2N.jl diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 56bc5356..a23973a4 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -104,13 +104,15 @@ function LM( k = 0 Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) + R = eltype(xk) + sqrt_ξ1_νInv = one(R) Complex_hist = zeros(Int, maxIter) Grad_hist = zeros(Int, maxIter) Resid_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" #! format: on end @@ -176,14 +178,15 @@ function LM( prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 * νInv) if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt(ξ1) + ϵ_increment = ϵr * sqrt_ξ1_νInv ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt(ξ1) < ϵ + if sqrt_ξ1_νInv < ϵ # the current xk is approximately first-order stationary optimal = true continue @@ -191,7 +194,8 @@ function LM( subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + #subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1., nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -221,7 +225,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat #! format: off end @@ -244,6 +248,7 @@ function LM( jtprod_residual!(nls, xk, Fk, ∇fk) σmax = opnorm(Jk) + Complex_hist[k] += 1 end @@ -252,6 +257,7 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + tired = k ≥ maxIter || elapsed_time > maxTime end @@ -260,9 +266,9 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv #! format: on - @info "LM: terminating with √ξ1 = $(sqrt(ξ1))" + @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end end status = if optimal diff --git a/src/R2DH.jl b/src/R2DH.jl new file mode 100644 index 00000000..0e7157b0 --- /dev/null +++ b/src/R2DH.jl @@ -0,0 +1,297 @@ +export R2DH + +""" + R2DH(nlp, h, options) + R2DH(f, ∇f!, h, options, x0) + +A first-order quadratic regularization method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +About each iterate xₖ, a step sₖ is computed as a solution of + + min φ(s; xₖ) + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = true`) and φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = false`) is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and σₖ > 0 is the regularization parameter. + +### Arguments + +* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `x0::AbstractVector`: an initial guess (in the second calling form) + +### Keyword Arguments + +* `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) +* `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). +* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). +* `summation`: boolean used to choose between the two versions of R2DH (see above, default : `true`). + +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function R2DH( + nlp::AbstractDiagonalQNModel{R, S}, + h, + options::ROSolverOptions{R}; + kwargs..., + ) where {R <: Real, S} + kwargs_dict = Dict(kwargs...) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + xk, k, outdict = R2DH( + x -> obj(nlp, x), + (g, x) -> grad!(nlp, x, g), + h, + hess_op(nlp, x0), + options, + x0; + kwargs..., + ) + ξ = outdict[:ξ] + stats = GenericExecutionStats(nlp) + set_status!(stats, outdict[:status]) + set_solution!(stats, xk) + set_objective!(stats, outdict[:fk] + outdict[:hk]) + set_residuals!(stats, zero(eltype(xk)), ξ) + set_iter!(stats, k) + set_time!(stats, outdict[:elapsed_time]) + set_solver_specific!(stats, :Fhist, outdict[:Fhist]) + set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) + set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) + return stats + end + +function R2DH( + f::F, + ∇f!::G, + h::H, + D::DQN, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + Mmonotone::Int = 5, + selected::AbstractVector{<:Integer} = 1:length(x0), + summation::Bool = true, + kwargs..., +) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator} + start_time = time() + elapsed_time = 0.0 + ϵ = options.ϵa + ϵr = options.ϵr + neg_tol = options.neg_tol + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + σmin = options.σmin + η1 = options.η1 + η2 = options.η2 + ν = options.ν + γ = options.γ + + local l_bound, u_bound + has_bnds = false + for (key, val) in kwargs + if key == :l_bound + l_bound = val + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + elseif key == :u_bound + u_bound = val + has_bnds = has_bnds || any(u_bound .!= R(Inf)) + end + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2DH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2DH: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + ψ = has_bnds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "" + #! format: off + end + + local ξ + k = 0 + σk = summation ? σmin : max(1 / ν, σmin) + + fk = f(xk) + ∇fk = similar(xk) + ∇f!(∇fk, xk) + ∇fk⁻ = copy(∇fk) + spectral_test = isa(D, SpectralGradient) + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + + + ν = 1 / DNorm + mν∇fk = -ν * ∇fk + sqrt_ξ_νInv = one(R) + + optimal = false + tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + + D.d .= max.(D.d, eps(R)) + + + # model with diagonal hessian + φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2 + mk(d) = φ(d) + ψ(d) + + if spectral_test + prox!(s, ψ, mν∇fk, ν) + else + iprox!(s, ψ, ∇fk, D) + end + + # iprox!(s, ψ, ∇fk, D) + + Complex_hist[k] += 1 + xkn .= xk .+ s + fkn = f(xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() + Δmod = fhmax - (fk + mk(s)) + max(1, abs(hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + + if ξ ≥ 0 && k == 1 + ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative + end + + if (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < ϵ) + # the current xk is approximately first-order stationary + optimal = true + continue + end + + if (ξ ≤ 0 || isnan(ξ)) + error("R2DH: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / Δmod + + σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat + #! format: on + end + + if η2 ≤ ρk < Inf + σk = max(σk / γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bnds && set_bounds!(ψ, l_bound - xk, u_bound - xk) + fk = fkn + hk = hkn + shift!(ψ, xk) + ∇f!(∇fk, xk) + push!(D, s, ∇fk - ∇fk⁻) # update QN operator + DNorm = norm(D.d, Inf) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + ν = 1 / DNorm + + tired = maxIter > 0 && k ≥ maxIter + if !tired + @. mν∇fk = -ν * ∇fk + end + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8.1e %8.1e" k fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" σk norm(xk) norm(s) + #! format: on + @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv))" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + outdict = Dict( + :Fhist => Fobj_hist[1:k], + :Hhist => Hobj_hist[1:k], + :Chist => Complex_hist[1:k], + :NonSmooth => h, + :status => status, + :fk => fk, + :hk => hk, + :ξ => ξ, + :elapsed_time => elapsed_time, + ) + + return xk, k, outdict +end diff --git a/src/R2N.jl b/src/R2N.jl new file mode 100644 index 00000000..b90b66cf --- /dev/null +++ b/src/R2N.jl @@ -0,0 +1,292 @@ +export R2N + +""" +R2N(nlp, h, χ, options; kwargs...) + +A regularized quasi-Newton method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is +lower semi-continuous and proper. + +About each iterate xₖ, a step sₖ is computed as an approximate solution of + + min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s) and σₖ > 0 is the regularization parameter. +The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient +method or the quadratic regularization method. + +### Arguments + +* `nlp::AbstractNLPModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters + +The objective, gradient and Hessian of `nlp` will be accessed. +The Hessian is accessed as an abstract operator and need not be the exact Hessian. + +### Keyword arguments + +* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) +* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) +* `subsolver`: the procedure used to compute a step (`PG` or `R2`) +* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) +* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function R2N( + f::AbstractNLPModel, + h::H, + options::ROSolverOptions{R}; + x0::AbstractVector = f.meta.x0, + subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), + subsolver = R2, + subsolver_options = ROSolverOptions(ϵa = options.ϵa), + selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), +) where {H, R} + start_time = time() + elapsed_time = 0.0 + # initialize passed options + ϵ = options.ϵa + ϵ_subsolver_init = subsolver_options.ϵa + ϵ_subsolver = copy(ϵ_subsolver_init) + ϵr = options.ϵr + Δk = options.Δk + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + η1 = options.η1 + η2 = options.η2 + γ = options.γ + θ = options.θ + σmin = options.σmin + α = options.α + β = options.β + + # store initial values of the subsolver_options fields that will be modified + ν_subsolver = subsolver_options.ν + ϵa_subsolver = subsolver_options.ϵa + + local l_bound, u_bound + if has_bounds(f) + l_bound = f.meta.lvar + u_bound = f.meta.uvar + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + #σk = max(1 / options.ν, σmin) #SVM + σk = σmin + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2N: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2N: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + ψ = has_bounds(f) ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Bₖ‖" "R2N" + #! format: on + end + + # main algorithm initialization + + local ξ1 + k = 0 + + fk = obj(f, xk) + ∇fk = grad(f, xk) + ∇fk⁻ = copy(∇fk) + + quasiNewtTest = isa(f, QuasiNewtonModel) + Bk = hess_op(f, xk) + + λmax = opnorm(Bk) + νInv = (1 + θ) *( σk + λmax) + sqrt_ξ1_νInv = one(R) + + optimal = false + tired = k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + + # model for first prox-gradient step and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) + + # model for subsequent prox-gradient steps and ξ + φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 + + ∇φ!(g, d) = begin + mul!(g, Bk, d) + g .+= ∇fk + g .+= σk * d + g + end + + mk(d) = φ(d) + ψ(d) + + # take first proximal gradient step s1 and see if current xk is nearly stationary + # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). + + subsolver_options.ν = 1 / νInv + prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν) + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + # println("sqrt_ξ1_νInv: ", sqrt_ξ1_νInv) + # println("ξ1: ", ξ1) + # println("νInv: ", νInv) + + if ξ1 ≥ 0 && k == 1 + ϵ_increment = ϵr * sqrt_ξ1_νInv + ϵ += ϵ_increment # make stopping test absolute and relative + ϵ_subsolver += ϵ_increment + end + + if sqrt_ξ1_νInv < ϵ + # the current xk is approximately first-order stationary + optimal = true + continue + end + s1 = copy(s) + + # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-3 : max(ϵ_subsolver, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default + @debug "setting inner stopping tolerance to" subsolver_options.optTol + subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () + s, iter, _ = with_logger(subsolver_logger) do + subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) + end + + if norm(s) > β * norm(s1) + s .= s1 + end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa + # is not modified if there is an error + + subsolver_options.ν = ν_subsolver + subsolver_options.ϵa = ϵ_subsolver_init + Complex_hist[k] = iter + + xkn .= xk .+ s + fkn = obj(f, xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + mks = mk(s) #- σk * dot(s, s) / 2 + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + + if (ξ ≤ 0 || isnan(ξ)) + error("R2N: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / ξ + + R2N_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ1) ρk σk norm(xk) norm(s) λmax R2N_stat + #! format: off + end + + if η2 ≤ ρk < Inf + σk = max(σk/γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bounds(f) && set_bounds!(ψ, l_bound - xk, u_bound - xk) + + #update functions + fk = fkn + hk = hkn + + # update gradient & Hessian + shift!(ψ, xk) + ∇fk = grad(f, xk) + if quasiNewtTest + push!(f, s, ∇fk - ∇fk⁻) + end + Bk = hess_op(f, xk) + λmax = opnorm(Bk) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + νInv = (1 + θ) *( σk + λmax) + + tired = k ≥ maxIter || elapsed_time > maxTime + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) λmax + #! format: on + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + + stats = GenericExecutionStats(f) + set_status!(stats, status) + set_solution!(stats, xk) + set_objective!(stats, fk + hk) + set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) + set_iter!(stats, k) + set_time!(stats, elapsed_time) + set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) + set_solver_specific!(stats, :Hhist, Hobj_hist[1:k]) + set_solver_specific!(stats, :NonSmooth, h) + set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) + return stats +end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 22c1c814..5983cab9 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -20,5 +20,7 @@ include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") +include("R2DH.jl") +include("R2N.jl") end # module RegularizedOptimization From 804ffdf363b11679b111ddcb65e4605fbfb07fb8 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 15 Sep 2024 12:08:54 -0400 Subject: [PATCH 03/19] add Quasi-Newton approach, all tests don't pass --- Project.toml | 4 +- src/L2Penalty_alg.jl | 94 +++++- src/R2N.jl | 557 ++++++++++++++++++++------------- src/RegularizedOptimization.jl | 2 +- test/runtests.jl | 34 +- 5 files changed, 455 insertions(+), 236 deletions(-) diff --git a/Project.toml b/Project.toml index 13d9c3ed..acdf5ef3 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,8 @@ version = "0.1.0" [deps] ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -38,4 +40,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestSetExtensions = "98d24dd4-01ad-11ea-1b02-c9a08f80db04" [targets] -test = ["Random", "RegularizedProblems", "Test", "TestSetExtensions"] \ No newline at end of file +test = ["Random", "RegularizedProblems", "Test", "TestSetExtensions"] diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 5b6bfcc6..d4b3b1af 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -45,6 +45,11 @@ function L2PenaltySolver( ) sub_nlp = RegularizedNLPModel(nlp, sub_ψ) sub_stats = GenericExecutionStats(nlp) + if sub_solver == R2NSolver + Solver = sub_solver(sub_nlp,sub_solver = L2_R2N_subsolver) + else + Solver = sub_solver(sub_nlp) + end return L2PenaltySolver( x, @@ -52,7 +57,7 @@ function L2PenaltySolver( s0, ψ, sub_ψ, - sub_solver(sub_nlp), + Solver, sub_stats ) end @@ -129,11 +134,12 @@ You can also use the `sub_callback` keyword argument which has exactly the same """ function L2Penalty( nlp::AbstractNLPModel{T, V}; + sub_solver = R2Solver, kwargs...) where{ T <: Real, V } if !equality_constrained(nlp) error("L2Penalty: This algorithm only works for equality contrained problems.") end - solver = L2PenaltySolver(nlp) + solver = L2PenaltySolver(nlp,sub_solver = sub_solver) stats = GenericExecutionStats(nlp) solve!( solver, @@ -319,4 +325,86 @@ function get_status( else :unknown end -end \ No newline at end of file +end + +mutable struct L2_R2N_subsolver{T <: Real, V <: AbstractVector{T}} <: AbstractOptimizationSolver + u1::V + u2::V +end + +function L2_R2N_subsolver( + reg_nlp::AbstractRegularizedNLPModel{T, V}; + ) where{T, V} + x0 = reg_nlp.model.meta.x0 + n = reg_nlp.model.meta.nvar + m = length(reg_nlp.h.b) + #x = zero(x0) + u1 = similar(x0, n+m) + u2 = zeros(eltype(x0), n+m) + + + return L2_R2N_subsolver( + u1, + u2, + ) +end + +function solve!( + solver::L2_R2N_subsolver{T, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V, V, Any}, + ∇fk::V, + Q::L, + σk::T; + x = reg_nlp.model.meta.x0, + atol = eps(T)^(0.5), + max_time = T(30), + max_iter = 10000 + ) where{T <: Real, V <: AbstractVector{T} , L <: AbstractLinearOperator} + + start_time = time() + set_time!(stats, 0.0) + set_iter!(stats, 0) + + n = reg_nlp.model.meta.nvar + m = length(reg_nlp.h.b) + Δ = reg_nlp.h.h.lambda + + u1 = solver.u1 + u2 = solver.u2 + + # Create problem + @. u1[1:n] = ∇fk + @. u1[n+1:n+m] = -reg_nlp.h.b + + αₖ = 0.0 + + H1 = [-Q reg_nlp.h.A'] + H2 = [reg_nlp.h.A αₖ*opEye(m,m)] + H = [H1;H2] + x1,_ = minres_qlp(H,u1,atol = eps()) + + if norm(x1[n+1:n+m]) <= Δ + set_solution!(stats,x1[1:n]) + return + else + u2[n+1:n+m] .= x1[n+1:n+m] + x2,_ = minres_qlp(H,u2,atol = eps()) + αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + end + k = 0 + + while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time + H2 = [reg_nlp.h.A αₖ*opEye(m,m)] + H = [H1;H2] + + x1,_ = minres_qlp(H,u1, atol = eps()) + u2[n+1:n+m] .= x1[n+1:n+m] + x2,_ = minres_qlp(H,u2, atol = eps()) + αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + set_iter!(stats,stats.iter + 1) + set_time!(stats,time()-start_time) + end + set_solution!(stats,x1[1:n]) + +end diff --git a/src/R2N.jl b/src/R2N.jl index b90b66cf..c8b6b39b 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -1,213 +1,280 @@ -export R2N - -""" -R2N(nlp, h, χ, options; kwargs...) - -A regularized quasi-Newton method for the problem - - min f(x) + h(x) - -where f: ℝⁿ → ℝ has a Lipschitz-continuous Jacobian, and h: ℝⁿ → ℝ is -lower semi-continuous and proper. - -About each iterate xₖ, a step sₖ is computed as an approximate solution of - - min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) - -where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic approximation of f about xₖ, -ψ(s; xₖ) = h(xₖ + s) and σₖ > 0 is the regularization parameter. -The subproblem is solved inexactly by way of a first-order method such as the proximal-gradient -method or the quadratic regularization method. - -### Arguments - -* `nlp::AbstractNLPModel`: a smooth optimization problem -* `h`: a regularizer such as those defined in ProximalOperators -* `options::ROSolverOptions`: a structure containing algorithmic parameters - -The objective, gradient and Hessian of `nlp` will be accessed. -The Hessian is accessed as an abstract operator and need not be the exact Hessian. - -### Keyword arguments - -* `x0::AbstractVector`: an initial guess (default: `nlp.meta.x0`) -* `subsolver_logger::AbstractLogger`: a logger to pass to the subproblem solver (default: the null logger) -* `subsolver`: the procedure used to compute a step (`PG` or `R2`) -* `subsolver_options::ROSolverOptions`: default options to pass to the subsolver (default: all defaut options) -* `selected::AbstractVector{<:Integer}`: (default `1:f.meta.nvar`). - -### Return values - -* `xk`: the final iterate -* `Fobj_hist`: an array with the history of values of the smooth objective -* `Hobj_hist`: an array with the history of values of the nonsmooth objective -* `Complex_hist`: an array with the history of number of inner iterations. -""" -function R2N( - f::AbstractNLPModel, - h::H, - options::ROSolverOptions{R}; - x0::AbstractVector = f.meta.x0, - subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), - subsolver = R2, - subsolver_options = ROSolverOptions(ϵa = options.ϵa), - selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), -) where {H, R} - start_time = time() - elapsed_time = 0.0 - # initialize passed options - ϵ = options.ϵa - ϵ_subsolver_init = subsolver_options.ϵa - ϵ_subsolver = copy(ϵ_subsolver_init) - ϵr = options.ϵr - Δk = options.Δk - verbose = options.verbose - maxIter = options.maxIter - maxTime = options.maxTime - η1 = options.η1 - η2 = options.η2 - γ = options.γ - θ = options.θ - σmin = options.σmin - α = options.α - β = options.β - - # store initial values of the subsolver_options fields that will be modified - ν_subsolver = subsolver_options.ν - ϵa_subsolver = subsolver_options.ϵa - - local l_bound, u_bound - if has_bounds(f) - l_bound = f.meta.lvar - u_bound = f.meta.uvar - end - - if verbose == 0 - ptf = Inf - elseif verbose == 1 - ptf = round(maxIter / 10) - elseif verbose == 2 - ptf = round(maxIter / 100) - else - ptf = 1 +export R2N, R2NSolver, solve! + +import SolverCore.solve! + +mutable struct R2NSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + S <: AbstractOptimizationSolver, +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + ψ::G + sub_ψ::G + xkn::V + s::V + s1::V + has_bnds::Bool + l_bound::V + u_bound::V + sub_solver::S + sub_stats::GenericExecutionStats{T, V, V, Any} +end + +function R2NSolver(reg_nlp::AbstractRegularizedNLPModel{T, V}; sub_solver = R2Solver) where {T, V} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = zero(x0) + s1 = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + + ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + sub_ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + + sub_nlp = RegularizedNLPModel(reg_nlp.model, sub_ψ) + sub_stats = GenericExecutionStats(reg_nlp.model) + + return R2NSolver( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + ψ, + sub_ψ, + xkn, + s, + s1, + has_bnds, + l_bound, + u_bound, + sub_solver(sub_nlp), + sub_stats + ) +end + +function SolverCore.solve!( + solver::R2NSolver{T}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + sub_atol::T = atol, + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + σmin::T = eps(T), + η1::T = √√eps(T), + η2::T = T(0.9), + ν = eps(T)^(1/5), + ν_subsolver = ν, + γ::T = T(3), + β = 1/eps(T), + θ = eps(T)^(1/5), + kwargs... +) where {T, V} + + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + s1 = solver.s1 + has_bnds = solver.has_bnds + if has_bnds + l_bound = solver.l_bound + u_bound = solver.u_bound end + sub_atol_init = copy(sub_atol) + sub_solver = solver.sub_solver + sub_stats = solver.sub_stats + sub_ψ = solver.sub_ψ # initialize parameters - #σk = max(1 / options.ν, σmin) #SVM - σk = σmin - xk = copy(x0) - hk = h(xk[selected]) + improper = false + hk = @views h(xk[selected]) if hk == Inf verbose > 0 && @info "R2N: finding initial guess where nonsmooth term is finite" - prox!(xk, h, x0, one(eltype(x0))) - hk = h(xk[selected]) + prox!(xk, h, xk, one(eltype(x0))) + hk = @views h(xk[selected]) hk < Inf || error("prox computation must be erroneous") verbose > 0 && @debug "R2N: found point where h has value" hk end - hk == -Inf && error("nonsmooth term is not proper") - - xkn = similar(xk) - s = zero(xk) - ψ = has_bounds(f) ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + improper = (hk == -Inf) - Fobj_hist = zeros(maxIter) - Hobj_hist = zeros(maxIter) - Complex_hist = zeros(Int, maxIter) if verbose > 0 - #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Bₖ‖" "R2N" - #! format: on + @info log_header( + [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normB, :arrow], + [Int, Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Char], + hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere + :outer => "outer", + :inner => "inner", + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ1/ν)", + :ρ => "ρ", + :σ => "σ", + :normx => "‖x‖", + :norms => "‖s‖", + :normB => "‖B‖", + :arrow => "R2N", + ), + colsep = 1, + ) end - # main algorithm initialization - - local ξ1 - k = 0 - - fk = obj(f, xk) - ∇fk = grad(f, xk) - ∇fk⁻ = copy(∇fk) + local ξ1::T + local ρk::T + σk = σmin - quasiNewtTest = isa(f, QuasiNewtonModel) - Bk = hess_op(f, xk) + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + + quasiNewtTest = isa(nlp, QuasiNewtonModel) + Bk = hess_op(nlp, xk) + local λmax::T + try + λmax = opnorm(Bk) + catch LAPACKException + λmax = opnorm(Matrix(Bk)) + end - λmax = opnorm(Bk) νInv = (1 + θ) *( σk + λmax) - sqrt_ξ1_νInv = one(R) - - optimal = false - tired = k ≥ maxIter || elapsed_time > maxTime - - while !(optimal || tired) - k = k + 1 - elapsed_time = time() - start_time - Fobj_hist[k] = fk - Hobj_hist[k] = hk + sqrt_ξ1_νInv = one(T) + ν_subsolver = 1/νInv - # model for first prox-gradient step and ξ1 - φ1(d) = ∇fk' * d - mk1(d) = φ1(d) + ψ(d) - - # model for subsequent prox-gradient steps and ξ - φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 - - ∇φ!(g, d) = begin - mul!(g, Bk, d) - g .+= ∇fk - g .+= σk * d - g - end + @. mν∇fk = -ν_subsolver * ∇fk - mk(d) = φ(d) + ψ(d) + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) - # take first proximal gradient step s1 and see if current xk is nearly stationary - # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). + # model for first prox-gradient step and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) - subsolver_options.ν = 1 / νInv - prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν) - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) - # println("sqrt_ξ1_νInv: ", sqrt_ξ1_νInv) - # println("ξ1: ", ξ1) - # println("νInv: ", νInv) + # model for subsequent prox-gradient steps and ξ + φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 - if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt_ξ1_νInv - ϵ += ϵ_increment # make stopping test absolute and relative - ϵ_subsolver += ϵ_increment - end + ∇φ!(g, d) = begin + mul!(g, Bk, d) + g .+= ∇fk + g .+= σk * d + g + end - if sqrt_ξ1_νInv < ϵ - # the current xk is approximately first-order stationary - optimal = true - continue - end - s1 = copy(s) - - # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) - subsolver_options.ϵa = k == 1 ? 1.0e-3 : max(ϵ_subsolver, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default - @debug "setting inner stopping tolerance to" subsolver_options.optTol - subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () - s, iter, _ = with_logger(subsolver_logger) do - subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) - end + mk(d) = φ(d) + ψ(d) + + prox!(s, ψ, mν∇fk, ν_subsolver) + mks = mk1(s) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν_subsolver) : sqrt(-ξ1 / ν_subsolver) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + sub_atol += rtol * sqrt_ξ1_νInv + + set_solver_specific!(stats, :xi, sqrt_ξ1_νInv) + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + + s1 .= s + + sub_atol = stats.iter == 0 ? 1.0e-3 : max(sub_atol, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default + #@debug "setting inner stopping tolerance to" subsolver_options.optTol + #subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () + nlp_model = FirstOrderModel(φ,∇φ!,s) + model = RegularizedNLPModel(nlp_model, ψ) + #model.selected .= reg_nlp.selected + if sub_solver == R2Solver + solve!( + sub_solver, + model, + sub_stats, + x = s, + atol = sub_atol, + ν = ν_subsolver, + kwargs...) + else + solve!( + sub_solver, + model, + sub_stats, + ∇fk, + Bk + σk*opEye(nlp.meta.nvar,nlp.meta.nvar), + σk, + atol = sub_atol, + max_time = max_time - stats.elapsed_time, + kwargs...) + end + s .= sub_stats.solution if norm(s) > β * norm(s1) s .= s1 end - # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa - # is not modified if there is an error + if mk(s) > mk(s1) + s .= s1 + end - subsolver_options.ν = ν_subsolver - subsolver_options.ϵa = ϵ_subsolver_init - Complex_hist[k] = iter + + sub_atol = sub_atol_init xkn .= xk .+ s - fkn = obj(f, xkn) + fkn = obj(nlp, xkn) hkn = h(xkn[selected]) hkn == -Inf && error("nonsmooth term is not proper") - mks = mk(s) #- σk * dot(s, s) / 2 + + mks = mk(s) Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() @@ -217,13 +284,24 @@ function R2N( ρk = Δobj / ξ - R2N_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") - - if (verbose > 0) && (k % ptf == 0) - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ1) ρk σk norm(xk) norm(s) λmax R2N_stat - #! format: off - end + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + sub_stats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + ], + colsep = 1, + ) if η2 ≤ ρk < Inf σk = max(σk/γ, σmin) @@ -231,62 +309,95 @@ function R2N( if η1 ≤ ρk < Inf xk .= xkn - has_bounds(f) && set_bounds!(ψ, l_bound - xk, u_bound - xk) + has_bounds(nlp) && set_bounds!(ψ, l_bound - xk, u_bound - xk) #update functions fk = fkn hk = hkn - # update gradient & Hessian shift!(ψ, xk) - ∇fk = grad(f, xk) + ∇fk = grad!(nlp, xk, ∇fk) + @. mν∇fk = -ν_subsolver * ∇fk if quasiNewtTest - push!(f, s, ∇fk - ∇fk⁻) + push!(nlp, s, ∇fk - ∇fk⁻) + end + Bk = hess_op(nlp, xk) + try + λmax = opnorm(Bk) + catch LAPACKException + λmax = opnorm(Matrix(Bk)) end - Bk = hess_op(f, xk) - λmax = opnorm(Bk) ∇fk⁻ .= ∇fk end if ρk < η1 || ρk == Inf - σk = σk * γ + σk = σk * γ end νInv = (1 + θ) *( σk + λmax) - tired = k ≥ maxIter || elapsed_time > maxTime - end + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + prox!(s, ψ, mν∇fk, ν_subsolver) + mks = mk1(s) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν_subsolver) : sqrt(-ξ1 / ν_subsolver) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + if ξ1 < 0 && sqrt_ξ1_νInv > neg_tol + println(Matrix(ψ.A)) + println(ψ.b) + println(ψ.h.lambda) + println(mν∇fk) + println(ν_subsolver) + println(s) - if verbose > 0 - if k == 1 - @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk - elseif optimal - #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) λmax - #! format: on - @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown end - status = if optimal - :first_order - elseif elapsed_time > maxTime - :max_time - elseif tired - :max_iter - else - :exception + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + sub_stats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end - stats = GenericExecutionStats(f) - set_status!(stats, status) - set_solution!(stats, xk) - set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) - set_iter!(stats, k) - set_time!(stats, elapsed_time) - set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) - set_solver_specific!(stats, :Hhist, Hobj_hist[1:k]) - set_solver_specific!(stats, :NonSmooth, h) - set_solver_specific!(stats, :SubsolverCounter, Complex_hist[1:k]) + set_solution!(stats,xk) return stats end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 579b6348..0b60faef 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -8,7 +8,7 @@ using ProximalOperators, TSVD # dependencies from us using LinearOperators, - NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore, SparseMatricesCOO + NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore, SparseMatricesCOO, Krylov include("utils.jl") include("input_struct.jl") diff --git a/test/runtests.jl b/test/runtests.jl index 09003098..d14e8b50 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,25 +1,43 @@ using LinearAlgebra: length using LinearAlgebra, Random, Test using ProximalOperators +using ShiftedProximalOperators using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, SolverCore, RegularizedOptimization, OptimizationProblems, ADNLPModels, OptimizationProblems.ADNLPProblems - +using Random +Random.seed!(1234) const global compound = 1 const global nz = 10 * compound -const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) +const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 0) const global bpdn, bpdn_nls, sol = bpdn_model(compound) const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 meta = OptimizationProblems.meta -problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0), :] +problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0) .& (meta.name .!= "hs322"), :] +println(problem_list) for problem ∈ eachrow(problem_list) - for (nlp,subsolver_name) ∈ ((eval(Meta.parse(problem.name))(),"R2"),) + for (nlp,subsolver_name) ∈ ((eval(Meta.parse(problem.name))(),"R2"),(LSR1Model(eval(Meta.parse(problem.name))()),"R2N-LSR1"),(LBFGSModel(eval(Meta.parse(problem.name))()),"R2N-LBFGS")) @testset "Optimization Problems - $(problem.name) - L2Penalty - $(subsolver_name)" begin - out = L2Penalty( - nlp, - verbose = 1, - ) + if subsolver_name == "R2" + out = L2Penalty( + nlp, + verbose = 1, + atol = 1e-4, + rtol = 1e-4, + ktol = eps()^(0.2) + ) + else + out = L2Penalty( + nlp, + verbose = 1, + sub_solver = R2NSolver, + atol = 1e-4, + rtol = 1e-4, + ktol = eps()^(0.2), + max_time = 120.0 + ) + end @test typeof(out.solution) == typeof(nlp.meta.x0) @test length(out.solution) == nlp.meta.nvar @test typeof(out.dual_feas) == eltype(out.solution) From bcc8ab723a1f86f2871416dffaf666c8956889e1 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 15 Sep 2024 12:27:01 -0400 Subject: [PATCH 04/19] at this point, all test pass --- src/R2N.jl | 4 ---- test/runtests.jl | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/R2N.jl b/src/R2N.jl index c8b6b39b..bb13496e 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -278,10 +278,6 @@ function SolverCore.solve!( Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() ξ = hk - mks + max(1, abs(hk)) * 10 * eps() - if (ξ ≤ 0 || isnan(ξ)) - error("R2N: failed to compute a step: ξ = $ξ") - end - ρk = Δobj / ξ verbose > 0 && diff --git a/test/runtests.jl b/test/runtests.jl index d14e8b50..34a547d7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,16 +13,14 @@ const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 meta = OptimizationProblems.meta -problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0) .& (meta.name .!= "hs322"), :] +problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0), :] -println(problem_list) for problem ∈ eachrow(problem_list) for (nlp,subsolver_name) ∈ ((eval(Meta.parse(problem.name))(),"R2"),(LSR1Model(eval(Meta.parse(problem.name))()),"R2N-LSR1"),(LBFGSModel(eval(Meta.parse(problem.name))()),"R2N-LBFGS")) @testset "Optimization Problems - $(problem.name) - L2Penalty - $(subsolver_name)" begin if subsolver_name == "R2" out = L2Penalty( nlp, - verbose = 1, atol = 1e-4, rtol = 1e-4, ktol = eps()^(0.2) @@ -30,7 +28,6 @@ for problem ∈ eachrow(problem_list) else out = L2Penalty( nlp, - verbose = 1, sub_solver = R2NSolver, atol = 1e-4, rtol = 1e-4, From 81d002139d0d33ed54633074df0678961075d183 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 15 Sep 2024 12:32:23 -0400 Subject: [PATCH 05/19] reduce precision when computing sub problem --- src/L2Penalty_alg.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index d4b3b1af..b904dc19 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -382,14 +382,14 @@ function solve!( H1 = [-Q reg_nlp.h.A'] H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] - x1,_ = minres_qlp(H,u1,atol = eps()) + x1,_ = minres_qlp(H,u1) if norm(x1[n+1:n+m]) <= Δ set_solution!(stats,x1[1:n]) return else u2[n+1:n+m] .= x1[n+1:n+m] - x2,_ = minres_qlp(H,u2,atol = eps()) + x2,_ = minres_qlp(H,u2) αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ end k = 0 @@ -398,9 +398,9 @@ function solve!( H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] - x1,_ = minres_qlp(H,u1, atol = eps()) + x1,_ = minres_qlp(H,u1) u2[n+1:n+m] .= x1[n+1:n+m] - x2,_ = minres_qlp(H,u2, atol = eps()) + x2,_ = minres_qlp(H,u2) αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ set_iter!(stats,stats.iter + 1) set_time!(stats,time()-start_time) From 71c11328131c31eff064ac795f68bc7eb8bd3988 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 15 Sep 2024 23:49:44 -0400 Subject: [PATCH 06/19] add neg tol to l2penalty --- src/L2Penalty_alg.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index b904dc19..e5eb9b5e 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -222,8 +222,9 @@ function SolverCore.solve!( local θ::T prox!(s, ψ, s0, 1.0) θ = hx - ψ(s) - θ ≥ 0 || error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") - sqrt_θ = sqrt(θ) + + sqrt_θ = θ ≥ 0 ? sqrt(θ) : sqrt(-θ) + θ < 0 && sqrt_θ ≥ neg_tol && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") atol += rtol * sqrt_θ # make stopping test absolute and relative ktol = max(ktol,atol) # Keep ϵ₀ ≥ ϵ @@ -260,7 +261,8 @@ function SolverCore.solve!( prox!(s, ψ, s0, 1.0) θ = hx - ψ(s) - sqrt_θ = sqrt(θ) + sqrt_θ = θ ≥ 0 ? sqrt(θ) : sqrt(-θ) + θ < 0 && sqrt_θ ≥ neg_tol && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") if sqrt_θ > ktol τ = τ + β1 From e6867d4d300fbcc10d192aeca767e4899a32cb15 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 19 Sep 2024 14:19:24 -0400 Subject: [PATCH 07/19] fix minor issues in L2P --- src/L2Penalty_alg.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index e5eb9b5e..d34019f2 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -305,6 +305,9 @@ function SolverCore.solve!( done = stats.status != :unknown end + set_solution!(stats, xk) + return stats + end function get_status( @@ -389,11 +392,10 @@ function solve!( if norm(x1[n+1:n+m]) <= Δ set_solution!(stats,x1[1:n]) return - else - u2[n+1:n+m] .= x1[n+1:n+m] - x2,_ = minres_qlp(H,u2) - αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ end + u2[n+1:n+m] .= x1[n+1:n+m] + x2,_ = minres_qlp(H,u2) + αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ k = 0 while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time From d3a4601943b6ad60782e5de64f7604e2df27f7cb Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 19 Sep 2024 14:23:15 -0400 Subject: [PATCH 08/19] minor fix --- src/L2Penalty_alg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index d34019f2..521d1092 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -305,7 +305,7 @@ function SolverCore.solve!( done = stats.status != :unknown end - set_solution!(stats, xk) + set_solution!(stats, x) return stats end From fd42bb812cf7c31d16b7ed202ebdec28182e2c7c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 22 Sep 2024 12:43:09 -0400 Subject: [PATCH 09/19] add infeasibility test --- src/L2Penalty_alg.jl | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 521d1092..d6ba6ad4 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -102,6 +102,7 @@ For advanced usage, first define a solver "L2PenaltySolver" to preallocate the m - `max_time::Float64 = 30.0`: maximum time limit in seconds; - `max_iter::Int = 10000`: maximum number of iterations; - `sub_max_iter::Int = 10000`: maximum number of iterations for the subsolver; +- `max_decreas_iter::Int = 10`: maximum number of iteration where ‖c(xₖ)‖₂ does not decrease before calling the problem locally infeasible; - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; - `sub_verbose::Int = 0`: if > 0, display subsolver iteration details every `verbose` iteration; - `τ::T = T(100)`: initial penalty parameter; @@ -166,6 +167,7 @@ function SolverCore.solve!( max_time::T = T(30.0), max_eval::Int = -1, sub_max_eval::Int = -1, + max_decreas_iter = 10, verbose = 0, sub_verbose = 0, τ::T = T(100), @@ -200,7 +202,7 @@ function SolverCore.solve!( :iter => "outer", :sub_iter => "inner", :fx => "f(x)", - :hx => "h(x)", + :hx => "‖c(x)‖₂", :theta => "√θ", :xi => "√(ξ/ν)", :epsk => "ϵₖ", @@ -232,6 +234,8 @@ function SolverCore.solve!( done = false + n_iter_since_decrease = 0 + while !done model = RegularizedNLPModel(nlp, sub_ψ) solve!( @@ -254,6 +258,7 @@ function SolverCore.solve!( x .= solver.sub_stats.solution fx = solver.sub_stats.solver_specific[:smooth_obj] + hx_prev = copy(hx) hx = solver.sub_stats.solver_specific[:nonsmooth_obj]/τ sqrt_ξ_νInv = solver.sub_stats.solver_specific[:xi] @@ -269,8 +274,14 @@ function SolverCore.solve!( sub_ψ.h = NormL2(τ) solver.sub_solver.ψ.h = NormL2(τ) else + n_iter_since_decrease = 0 ktol = max(β2^(ceil(log(β2,sqrt_ξ_νInv/tol_init)))*ktol,atol) #the β^... allows to directly jump to a sufficiently small ϵₖ end + if sqrt_θ > ktol && hx_prev ≥ hx + n_iter_since_decrease += 1 + else + n_iter_since_decrease = 0 + end solved = (sqrt_θ ≤ atol && solver.sub_stats.status == :first_order) || (θ < 0 && sqrt_θ ≤ neg_tol && solver.sub_stats.status == :first_order) (θ < 0 && sqrt_θ > neg_tol) && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") @@ -292,11 +303,13 @@ function SolverCore.solve!( get_status( nlp, elapsed_time = stats.elapsed_time, + n_iter_since_decrease = n_iter_since_decrease, iter = stats.iter, optimal = solved, max_eval = max_eval, max_time = max_time, - max_iter = max_iter + max_iter = max_iter, + max_decreas_iter = max_decreas_iter ), ) @@ -315,9 +328,11 @@ function get_status( elapsed_time = 0.0, iter = 0, optimal = false, + n_iter_since_decrease = 0, max_eval = Inf, max_time = Inf, max_iter = Inf, + max_decreas_iter = Inf ) where{ M <: AbstractNLPModel } if optimal :first_order @@ -327,7 +342,9 @@ function get_status( :max_time elseif neval_obj(nlp) > max_eval && max_eval > -1 :max_eval - else + elseif n_iter_since_decrease ≥ max_decreas_iter + :infeasible + else :unknown end end From e668e275f35bcc280575b1bbcd38a1de9634193e Mon Sep 17 00:00:00 2001 From: MaxenceGollier <134112149+MaxenceGollier@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:57:05 -0400 Subject: [PATCH 10/19] remove println in R2N --- src/R2N.jl | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/R2N.jl b/src/R2N.jl index bb13496e..d75348fd 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -343,15 +343,7 @@ function SolverCore.solve!( ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν_subsolver) : sqrt(-ξ1 / ν_subsolver) solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) - if ξ1 < 0 && sqrt_ξ1_νInv > neg_tol - println(Matrix(ψ.A)) - println(ψ.b) - println(ψ.h.lambda) - println(mν∇fk) - println(ν_subsolver) - println(s) - - end + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") @@ -396,4 +388,4 @@ function SolverCore.solve!( set_solution!(stats,xk) return stats -end \ No newline at end of file +end From 007d9bed3b83bb86b92ee5124940bedadd5d0f1f Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 28 Sep 2024 17:27:58 -0400 Subject: [PATCH 11/19] Solve R2N bug - HS79 works from here --- src/L2Penalty_alg.jl | 4 ++-- src/R2N.jl | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index d6ba6ad4..8c4e27de 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -390,7 +390,7 @@ function solve!( n = reg_nlp.model.meta.nvar m = length(reg_nlp.h.b) - Δ = reg_nlp.h.h.lambda + Δ = reg_nlp.h.h.lambda/σk u1 = solver.u1 u2 = solver.u2 @@ -405,7 +405,7 @@ function solve!( H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] x1,_ = minres_qlp(H,u1) - + #println(stats_minres) if norm(x1[n+1:n+m]) <= Δ set_solution!(stats,x1[1:n]) return diff --git a/src/R2N.jl b/src/R2N.jl index d75348fd..7a3b4c1b 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -76,6 +76,7 @@ function SolverCore.solve!( max_iter::Int = 10000, max_time::Float64 = 30.0, max_eval::Int = -1, + σk = eps(T)^(-1/5), σmin::T = eps(T), η1::T = √√eps(T), η2::T = T(0.9), @@ -151,7 +152,6 @@ function SolverCore.solve!( local ξ1::T local ρk::T - σk = σmin fk = obj(nlp, xk) grad!(nlp, xk, ∇fk) @@ -266,7 +266,6 @@ function SolverCore.solve!( s .= s1 end - sub_atol = sub_atol_init xkn .= xk .+ s From 2991660136a159c0bb73bbba9798327cd9655c90 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 28 Sep 2024 22:57:13 -0400 Subject: [PATCH 12/19] solve bug in R2N subsolver --- src/L2Penalty_alg.jl | 2 +- src/R2N.jl | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 8c4e27de..36d5dc54 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -396,7 +396,7 @@ function solve!( u2 = solver.u2 # Create problem - @. u1[1:n] = ∇fk + @. u1[1:n] = ∇fk/σk @. u1[n+1:n+m] = -reg_nlp.h.b αₖ = 0.0 diff --git a/src/R2N.jl b/src/R2N.jl index 7a3b4c1b..5a0b2691 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -76,12 +76,10 @@ function SolverCore.solve!( max_iter::Int = 10000, max_time::Float64 = 30.0, max_eval::Int = -1, - σk = eps(T)^(-1/5), σmin::T = eps(T), η1::T = √√eps(T), η2::T = T(0.9), ν = eps(T)^(1/5), - ν_subsolver = ν, γ::T = T(3), β = 1/eps(T), θ = eps(T)^(1/5), @@ -100,6 +98,7 @@ function SolverCore.solve!( # Make sure ψ has the correct shift shift!(solver.ψ, xk) + σk = 1/ν ∇fk = solver.∇fk ∇fk⁻ = solver.∇fk⁻ mν∇fk = solver.mν∇fk @@ -251,7 +250,7 @@ function SolverCore.solve!( model, sub_stats, ∇fk, - Bk + σk*opEye(nlp.meta.nvar,nlp.meta.nvar), + Bk/σk + opEye(nlp.meta.nvar,nlp.meta.nvar), σk, atol = sub_atol, max_time = max_time - stats.elapsed_time, From 3156e828d5da02c5c02dabe426ac05c46e77cd5f Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sat, 28 Sep 2024 23:49:25 -0400 Subject: [PATCH 13/19] solve final bug, everything should work from here --- src/R2N.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R2N.jl b/src/R2N.jl index 5a0b2691..856e5750 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -344,7 +344,7 @@ function SolverCore.solve!( (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - + set_solver_specific!(stats, :xi, sqrt_ξ1_νInv) set_status!( stats, get_status( From b611cad017b4216f85dec0b9138f5e5fb7984666 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 30 Sep 2024 16:03:33 -0400 Subject: [PATCH 14/19] add consistency check + safeguarding in R2N subsolver --- src/L2Penalty_alg.jl | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 36d5dc54..438f35ac 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -400,20 +400,27 @@ function solve!( @. u1[n+1:n+m] = -reg_nlp.h.b αₖ = 0.0 + θ = 0.8 H1 = [-Q reg_nlp.h.A'] H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] - x1,_ = minres_qlp(H,u1) - #println(stats_minres) - if norm(x1[n+1:n+m]) <= Δ + x1,stats_minres = minres_qlp(H,u1) + + if norm(x1[n+1:n+m]) <= Δ && stats_minres.inconsistent == false #Check interior convergence for consistent solutions set_solution!(stats,x1[1:n]) return end + if stats_minres.inconsistent == true + αₖ = sqrt(eps(T)) + H2 = [reg_nlp.h.A αₖ*opEye(m,m)] + H = [H1;H2] + x1,_ = minres_qlp(H,u1) + end u2[n+1:n+m] .= x1[n+1:n+m] x2,_ = minres_qlp(H,u2) - αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ - k = 0 + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time H2 = [reg_nlp.h.A αₖ*opEye(m,m)] @@ -422,7 +429,8 @@ function solve!( x1,_ = minres_qlp(H,u1) u2[n+1:n+m] .= x1[n+1:n+m] x2,_ = minres_qlp(H,u2) - αₖ += norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ set_iter!(stats,stats.iter + 1) set_time!(stats,time()-start_time) end From 2f42f84d2c15d45d69066e5daf249f87c96430bd Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 30 Sep 2024 17:33:56 -0400 Subject: [PATCH 15/19] add safeguard for smallest value of alpha --- src/L2Penalty_alg.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 438f35ac..1ef16d77 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -419,18 +419,26 @@ function solve!( end u2[n+1:n+m] .= x1[n+1:n+m] x2,_ = minres_qlp(H,u2) + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ + αₖ = αₖ ≤ sqrt(eps(T)) ? sqrt(eps(T)) : αₖ while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time + H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] - x1,_ = minres_qlp(H,u1) + + αₖ == sqrt(eps(T)) && break + u2[n+1:n+m] .= x1[n+1:n+m] x2,_ = minres_qlp(H,u2) + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ + αₖ = αₖ ≤ sqrt(eps(T)) ? sqrt(eps(T)) : αₖ + set_iter!(stats,stats.iter + 1) set_time!(stats,time()-start_time) end From 0b16d5e5e445faa628f01c5b884d692793a72615 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 30 Sep 2024 19:01:50 -0400 Subject: [PATCH 16/19] test wether J has full row rank or not --- src/L2Penalty_alg.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 1ef16d77..6a91cf11 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -400,6 +400,7 @@ function solve!( @. u1[n+1:n+m] = -reg_nlp.h.b αₖ = 0.0 + αmin = sqrt(eps(T)) θ = 0.8 H1 = [-Q reg_nlp.h.A'] @@ -407,12 +408,12 @@ function solve!( H = [H1;H2] x1,stats_minres = minres_qlp(H,u1) - if norm(x1[n+1:n+m]) <= Δ && stats_minres.inconsistent == false #Check interior convergence for consistent solutions + if norm(x1[n+1:n+m]) <= Δ && reg_nlp.h.full_row_rank set_solution!(stats,x1[1:n]) return end if stats_minres.inconsistent == true - αₖ = sqrt(eps(T)) + αₖ = αmin H2 = [reg_nlp.h.A αₖ*opEye(m,m)] H = [H1;H2] x1,_ = minres_qlp(H,u1) @@ -422,7 +423,7 @@ function solve!( α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ - αₖ = αₖ ≤ sqrt(eps(T)) ? sqrt(eps(T)) : αₖ + αₖ = αₖ ≤ αmin ? αmin : αₖ while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time @@ -430,7 +431,7 @@ function solve!( H = [H1;H2] x1,_ = minres_qlp(H,u1) - αₖ == sqrt(eps(T)) && break + αₖ == αmin && break u2[n+1:n+m] .= x1[n+1:n+m] x2,_ = minres_qlp(H,u2) From b1c827ee386d8f05d5bba83f63e3f081361ecf15 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 31 Oct 2024 15:05:30 +0000 Subject: [PATCH 17/19] update Project + correct tests --- Project.toml | 6 +++--- test/runtests.jl | 31 +++++++++++++++++++------------ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index acdf5ef3..b1104def 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ author = ["Robert Baraldi and Dominique Orban Date: Thu, 31 Oct 2024 15:09:41 +0000 Subject: [PATCH 18/19] identation and type --- src/L2Penalty_alg.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index 6a91cf11..a00c86d1 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -4,12 +4,12 @@ import SolverCore.solve! mutable struct L2PenaltySolver{T <: Real, V <: AbstractVector{T}, S <: AbstractOptimizationSolver} <: AbstractOptimizationSolver x::V - s::V - s0::V + s::V + s0::V ψ::ShiftedCompositeNormL2 - sub_ψ::CompositeNormL2 - sub_solver::S - sub_stats::GenericExecutionStats{T, V, V, Any} + sub_ψ::CompositeNormL2 + sub_solver::S + sub_stats::GenericExecutionStats{T, V, V, Any} end function L2PenaltySolver( @@ -222,7 +222,7 @@ function SolverCore.solve!( set_solver_specific!(stats,:nonsmooth_obj, hx) local θ::T - prox!(s, ψ, s0, 1.0) + prox!(s, ψ, s0, T(1)) θ = hx - ψ(s) sqrt_θ = θ ≥ 0 ? sqrt(θ) : sqrt(-θ) From 3999c8ef5020cc0121df87ceea2665cb763c9de2 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 31 Oct 2024 15:18:05 +0000 Subject: [PATCH 19/19] have only H instead of H1 H2 --- src/L2Penalty_alg.jl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/L2Penalty_alg.jl b/src/L2Penalty_alg.jl index a00c86d1..e8a841f3 100644 --- a/src/L2Penalty_alg.jl +++ b/src/L2Penalty_alg.jl @@ -403,9 +403,7 @@ function solve!( αmin = sqrt(eps(T)) θ = 0.8 - H1 = [-Q reg_nlp.h.A'] - H2 = [reg_nlp.h.A αₖ*opEye(m,m)] - H = [H1;H2] + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] x1,stats_minres = minres_qlp(H,u1) if norm(x1[n+1:n+m]) <= Δ && reg_nlp.h.full_row_rank @@ -414,8 +412,7 @@ function solve!( end if stats_minres.inconsistent == true αₖ = αmin - H2 = [reg_nlp.h.A αₖ*opEye(m,m)] - H = [H1;H2] + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] x1,_ = minres_qlp(H,u1) end u2[n+1:n+m] .= x1[n+1:n+m] @@ -427,8 +424,7 @@ function solve!( while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time - H2 = [reg_nlp.h.A αₖ*opEye(m,m)] - H = [H1;H2] + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] x1,_ = minres_qlp(H,u1) αₖ == αmin && break