diff --git a/Project.toml b/Project.toml index 1537bd1..713e3ea 100644 --- a/Project.toml +++ b/Project.toml @@ -6,24 +6,30 @@ version = "0.1.0" [deps] GenericArpack = "408c25d7-97fa-4992-8bd9-ce15f1a09fe9" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LowRankOpt = "607ca3ad-272e-43c8-bcbe-fc71b56c935c" LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2" MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" MKLSparse = "0c723cd3-b8cd-5d40-b370-ba682dde9aae" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" PolynomialRoots = "3a141323-8675-5d76-9d11-e1df1406c778" Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] GenericArpack = "0.2" +LowRankOpt = "0.2.1" LuxurySparse = "0.7" MKL = "0.6, 0.7" MKLSparse = "1, 2" +NLPModels = "0.21.5" Parameters = "0.12" PolynomialRoots = "1" Polynomials = "4" +SolverCore = "0.3.8" julia = "1" [extras] diff --git a/exps/Project.toml b/exps/Project.toml index a18b9ba..d43bcb5 100644 --- a/exps/Project.toml +++ b/exps/Project.toml @@ -1,9 +1,12 @@ [deps] CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" +Dualization = "191a621a-6537-11e9-281d-650236a99e60" GenericArpack = "408c25d7-97fa-4992-8bd9-ce15f1a09fe9" +JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LowRankOpt = "607ca3ad-272e-43c8-bcbe-fc71b56c935c" LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2" MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" MKLSparse = "0c723cd3-b8cd-5d40-b370-ba682dde9aae" diff --git a/exps/bench.jl b/exps/bench.jl new file mode 100644 index 0000000..6abc903 --- /dev/null +++ b/exps/bench.jl @@ -0,0 +1,118 @@ +using Revise +using SparseArrays +using SDPLRPlus + +using LowRankOpt +using Dualization +include(joinpath(dirname(dirname(pathof(LowRankOpt))), "examples", "maxcut.jl")) + +n = 500 +import Random +Random.seed!(0) +W = sprand(n, n, 0.01) +W = W + W' +include(joinpath(@__DIR__, "problems.jl")) +C, As, b = maxcut(W) +@time sdplr(C, As, b, 1, maxmajoriter = 20); + +# SDPLRPlus does not support sparse factor +As = [SymLowRankMatrix(Diagonal(ones(1)), hcat(e_i(Float64, i, n, sparse = false))) for i in 1:n] +d = SDPLRPlus.SDPData(C, As, b) +var = SDPLRPlus.SolverVars(d, 1) +aux = SDPLRPlus.SolverAuxiliary(d) +@time sdplr(C, As, b, 1, maxmajoriter = 50); + +model = maxcut(W, dual_optimizer(LRO.Optimizer)) +set_attribute(model, "solver", LRO.BurerMonteiro.Solver) +set_attribute(model, "sub_solver", SDPLRPlus.Solver) +set_attribute(model, "ranks", [1]) +set_attribute(model, "maxmajoriter", 0) +set_attribute(model, "printlevel", 3) +@profview optimize!(model) +solver = unsafe_backend(model).dual_problem.dual_model.model.optimizer.solver + +using BenchmarkTools +function A_sym_bench(aux, var, nlp, var_lro) + println("π’œ sym") + @btime SDPLRPlus.π’œ!(var.primal_vio, aux, var.Rt) + @btime SDPLRPlus.π’œ!(var_lro.primal_vio, nlp, var_lro.Rt) +end +function A_not_sym_bench(aux, var, nlp, var_lro) + println("π’œ not sym") + @btime SDPLRPlus.π’œ!(var.A_RD, aux, var.Rt, var.Gt) + @btime SDPLRPlus.π’œ!(var_lro.A_RD, nlp, var_lro.Rt, var_lro.Gt) +end +function At_bench(aux, var, nlp, var_lro) + println("π’œt") + @btime SDPLRPlus.π’œt!($var.Gt, $var.Rt, $aux, $var) + @btime SDPLRPlus.π’œt!($var_lro.Gt, $var_lro.Rt, $nlp, $var_lro) +end +function At_bench2(aux, var, nlp, var_lro) + println("π’œt rank-1") + x = rand(n) + y = similar(x) + @time SDPLRPlus.π’œt!(y, aux, x, var) + @time SDPLRPlus.π’œt!(y, nlp, x, var_lro) +end +nlp = solver.model +var_lro = solver.solver.var +A_sym_bench(aux, var, solver.model, solver.solver.var) +A_not_sym_bench(aux, var, solver.model, solver.solver.var) +At_bench(aux, var, solver.model, solver.solver.var) +At_bench2(aux, var, solver.model, solver.solver.var) +@profview SDPLRPlus.π’œ!(var.primal_vio, aux, var.Rt) +@profview for i in 1:100 + SDPLRPlus.π’œ!(var_lro.primal_vio, nlp, var_lro.Rt) +end +@time SDPLRPlus.π’œ!(var_lro.primal_vio, nlp, var_lro.Rt) +@time SDPLRPlus.π’œt!(var_lro.Gt, var_lro.Rt, nlp, var_lro) +@profview for _ in 1:1000 + SDPLRPlus.π’œt!(var_lro.Gt, var_lro.Rt, nlp, var_lro) +end +function bench_lmul(A) + n = LinearAlgebra.checksquare(A) + x = rand(1, n) + y = similar(x) + @profview for i in 1:100000 + LinearAlgebra.mul!(y, x, A, 2.0, 1.0) + end + #@btime LinearAlgebra.mul!($y, $x, $A, 2.0, 1.0) +end +function bench_rmul(A) + n = LinearAlgebra.checksquare(A) + x = rand(n, 1) + y = similar(x) + @btime LinearAlgebra.mul!($y, $A, $x, 2.0, 1.0) +end +bench_lmul(aux.symlowrank_As[1]); +A = aux.symlowrank_As[1] +n = LinearAlgebra.checksquare(A) +x = rand(1, n) +y = similar(x) +LinearAlgebra.mul!(y, x, A, 2.0, 1.0) + +bench_rmul(nlp.model.A[1]); +bench_lmul(nlp.model.A[1]) +A = nlp.model.A[1] +n = LinearAlgebra.checksquare(A) +x = rand(n, 1) +y = similar(x) +@time LinearAlgebra.mul!(y, A, x, 2.0, 1.0); +@btime LinearAlgebra.mul!($y, $A, $x, 2.0, 1.0); +methods(LinearAlgebra.mul!, typeof.((y, A, x, 2.0, 1.0))) + +A = nlp.model.A[1] +n = LinearAlgebra.checksquare(A) +x = rand(n) +y = similar(x) +x = rand(n) +y = similar(x) +@btime LinearAlgebra.mul!($y, $A, $x, 2.0, 1.0); + +C = LRO._lmul_diag!!(A.scaling, LRO.right_factor(A)' * x) +lA = LRO.left_factor(A) +@btime LinearAlgebra.mul!($y, $C, $lA) +@edit LinearAlgebra.mul!(y, lA, C); +@edit LinearAlgebra.mul!(y, lA, C, true, true); +@edit LinearAlgebra._rscale_add!(y, lA, C, true, true); +@edit LinearAlgebra.mul!($y, $lA, $C, 2.0, 1.0); diff --git a/src/SDPLRPlus.jl b/src/SDPLRPlus.jl index 80e35a4..1e6b47c 100644 --- a/src/SDPLRPlus.jl +++ b/src/SDPLRPlus.jl @@ -41,5 +41,6 @@ include("utils.jl") # main function include("sdplr.jl") +include("lowrankopt.jl") -end \ No newline at end of file +end diff --git a/src/coreop.jl b/src/coreop.jl index b3069dd..63cb9aa 100644 --- a/src/coreop.jl +++ b/src/coreop.jl @@ -6,18 +6,19 @@ the augmented Lagrangian value, β„’(R, Ξ», Οƒ) = Tr(C RRα΅€) - Ξ»α΅€(π’œ(RRα΅€) - b) + Οƒ/2 ||π’œ(RRα΅€) - b||^2 """ function f!( - data::SDPData{Ti, Tv, TC}, - var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, -) where {Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} - m = data.m # number of constraints + data, + var::SolverVars, + aux, +) + m = length(var.Ξ») # number of constraints # apply the operator π’œ to RRα΅€ and compute the objective value - π’œ!(aux.primal_vio, aux.UVt, aux, var.Rt) - var.obj[] = aux.primal_vio[m+1] + π’œ!(var.primal_vio, aux, var.Rt) + var.obj[] = var.primal_vio[m+1] - v = @view aux.primal_vio[1:m] - @. v -= data.b + v = @view var.primal_vio[1:m] + b = b_vector(data) + @. v -= b return (var.obj[] - dot(var.Ξ», v) + var.Οƒ[] * dot(v, v) / 2) end @@ -27,7 +28,6 @@ Compute π’œ(UUα΅€) """ function π’œ!( π’œ_UUt::Vector{Tv}, - UUt::Vector{Tv}, aux::SolverAuxiliary{Ti, Tv}, Ut::Matrix{Tv}, ) where {Ti <: Integer, Tv} @@ -35,7 +35,7 @@ function π’œ!( # deal with sparse and diagonal constraints first # store results of π’œ(UUα΅€) if aux.n_sparse_matrices > 0 - π’œ_sparse!(π’œ_UUt, UUt, aux, Ut) + π’œ_sparse!(π’œ_UUt, aux.UVt, aux, Ut) end # then deal with low-rank matrices if aux.n_symlowrank_matrices > 0 @@ -48,7 +48,6 @@ Compute π’œ((UVα΅€ + VUα΅€)/2) """ function π’œ!( π’œ_UVt::Vector{Tv}, - UVt::Vector{Tv}, aux::SolverAuxiliary{Ti, Tv}, Ut::Matrix{Tv}, Vt::Matrix{Tv}; @@ -57,7 +56,7 @@ function π’œ!( # deal with sparse and diagonal constraints first # store results of 𝓐(UVα΅€ + VUα΅€)/2 if aux.n_sparse_matrices > 0 - π’œ_sparse!(π’œ_UVt, UVt, aux, Ut, Vt) + π’œ_sparse!(π’œ_UVt, aux.UVt, aux, Ut, Vt) end # then deal with low-rank matrices if aux.n_symlowrank_matrices > 0 @@ -226,13 +225,12 @@ end function copy2y_Ξ»_sub_pvio!( var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, )where {Ti <: Integer, Tv} - m = length(aux.primal_vio)-1 + m = length(var.primal_vio)-1 @inbounds @simd for i = 1:m - aux.y[i] = -(var.Ξ»[i] - var.Οƒ[] * aux.primal_vio[i]) + var.y[i] = -(var.Ξ»[i] - var.Οƒ[] * var.primal_vio[i]) end - aux.y[m+1] = one(Tv) + var.y[m+1] = one(Tv) end @@ -240,22 +238,23 @@ function copy2y_Ξ»!( var::SolverVars{Ti, Tv}, aux::SolverAuxiliary{Ti, Tv}, )where {Ti <: Integer, Tv} - m = length(aux.primal_vio)-1 + m = length(var.primal_vio)-1 @inbounds @simd for i = 1:m - aux.y[i] = -var.Ξ»[i] + var.y[i] = -var.Ξ»[i] end - aux.y[m+1] = one(Tv) + var.y[m+1] = one(Tv) end function π’œt_preprocess!( + var::SolverVars{Ti, Tv}, aux::SolverAuxiliary{Ti, Tv}, ) where {Ti <: Integer, Tv} # update the sparse matrix # compute C - βˆ‘_{i=1}^{m} Ξ»_i A_i for sparse matrices if aux.n_sparse_matrices > 0 - v = aux.y[aux.sparse_As_global_inds] + v = var.y[aux.sparse_As_global_inds] π’œt_preprocess_sparse!(aux.sparse_S, aux.triu_sparse_S.nzval, v, aux) end end @@ -265,6 +264,7 @@ function π’œt!( y::Tx, x::Tx, aux::SolverAuxiliary{Ti, Tv}, + var::SolverVars{Ti, Tv}, )where{Ti <: Integer, Tv, Tx <: AbstractArray{Tv}} # zero out output vector fill!(y, zero(Tv)) @@ -278,7 +278,7 @@ function π’œt!( if aux.n_symlowrank_matrices > 0 for i = 1:aux.n_symlowrank_matrices global_id = aux.symlowrank_As_global_inds[i] - coeff = aux.y[global_id] + coeff = var.y[global_id] mul!(y, x, aux.symlowrank_As[i], coeff, one(Tv)) end end @@ -289,6 +289,7 @@ function π’œt!( y::Tx, aux::SolverAuxiliary{Ti, Tv}, x::Tx, + var::SolverVars{Ti, Tv}, ) where{Ti <: Integer, Tv, Tx <: AbstractArray{Tv}} # zero out output vector fill!(y, zero(Tv)) @@ -302,7 +303,7 @@ function π’œt!( if aux.n_symlowrank_matrices > 0 for i = 1:aux.n_symlowrank_matrices global_id = aux.symlowrank_As_global_inds[i] - coeff = aux.y[global_id] + coeff = var.y[global_id] mul!(y, aux.symlowrank_As[i], x, coeff, one(Tv)) end end @@ -314,14 +315,14 @@ This function computes the gradient of the augmented Lagrangian """ function g!( var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, + aux, ) where{Ti <: Integer, Tv} π’œt_preprocess_dt = @elapsed begin - copy2y_Ξ»_sub_pvio!(var, aux) - π’œt_preprocess!(aux) + copy2y_Ξ»_sub_pvio!(var) + π’œt_preprocess!(var, aux) end densesparse_dt = @elapsed begin - π’œt!(var.Gt, var.Rt, aux) + π’œt!(var.Gt, var.Rt, aux, var) end @debug ("π’œt_preprocess_dt: $π’œt_preprocess_dt, densesparse_dt: $densesparse_dt") @@ -335,13 +336,13 @@ Function for computing Lagrangian value, stationary condition and primal feasibility. """ function fg!( - data::SDPData{Ti, Tv, TC}, + data, var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, + aux, normC::Tv, normb::Tv, -) where {Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} - m = data.m +) where {Ti <: Integer, Tv} + m = length(var.Ξ») f_dt = @elapsed begin 𝓛_val = f!(data, var, aux) end @@ -351,7 +352,7 @@ function fg!( @debug "f dt, g dt" f_dt, g_dt grad_norm = norm(var.Gt, 2) / (1.0 + normC) - v = @view aux.primal_vio[1:m] + v = @view var.primal_vio[1:m] primal_vio_norm = norm(v, 2) / (1.0 + normb) return (𝓛_val, grad_norm, primal_vio_norm) end @@ -365,16 +366,16 @@ function SDP_S_eigval( kwargs... ) where {Ti <: Integer, Tv} if !preprocessed - π’œt_preprocess!(aux) + π’œt_preprocess!(aux, var) end n = size(aux.sparse_S, 1) GenericArpack_dt = @elapsed begin op = ArpackSimpleFunctionOp( (y, x) -> begin - π’œt!(y, aux, x) + π’œt!(y, aux, x, var) # shift the matrix by I y .+= x - return y + return y end, n) GenericArpack_eigvals, _ = symeigs(op, nevs; kwargs...) end @@ -385,18 +386,18 @@ end function surrogate_duality_gap( - data::SDPData{Ti, Tv, TC}, + data, var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, + aux, trace_bound::Tv, iter::Ti; highprecision::Bool=false, -) where {Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} - copy2y_Ξ»_sub_pvio!(var, aux) +) where {Ti <: Integer, Tv} + copy2y_Ξ»_sub_pvio!(var) - π’œt_preprocess!(aux) + π’œt_preprocess!(var, aux) lanczos_dt = @elapsed begin - lanczos_eigenval = approx_mineigval_lanczos(aux, iter) + lanczos_eigenval = approx_mineigval_lanczos(var, aux, iter) end res = lanczos_eigenval if highprecision @@ -410,8 +411,9 @@ function surrogate_duality_gap( GenericArpack_evs = [0.0] end - m = length(data.b) - duality_gap = (var.obj[] + dot(aux.y[1:m], data.b) - + b = b_vector(data) + m = length(b_vector(data)) + duality_gap = (var.obj[] + dot(var.y[1:m], b) - trace_bound * min(res[1], 0.0)) rel_duality_gap = duality_gap / max(one(Tv), abs(var.obj[])) @@ -435,13 +437,13 @@ function DIMACS_errors( aux::SolverAuxiliary{Ti, Tv}, ) where {Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} n = size(aux.sparse_S, 1) - v = @view aux.primal_vio[1:data.m] + v = @view var.primal_vio[1:data.m] err1 = norm(v, 2) / (1.0 + norm(data.b, 2)) err2 = 0.0 err3 = 0.0 # err2, err3 are zero as X = YY^T, Z = C - π’œ^*(y) copy2y_Ξ»!(var, aux) - π’œt_preprocess!(aux) + π’œt_preprocess!(var, aux) GenericArpack_evs, _ = SDP_S_eigval(var, aux, 1, true; @@ -463,10 +465,11 @@ Perform `q` Lanczos iterations with *a random start vector* to approximate the minimum eigenvalue of `A`. """ function approx_mineigval_lanczos( - aux::SolverAuxiliary{Ti, Tv}, + var::SolverVars{Ti, Tv}, + aux, q::Ti, ) where {Ti <: Integer, Tv} - n::Ti = size(aux.sparse_S, 1) + n::Ti = side_dimension(aux) q = min(q, n-1) # allocate lanczos vectors @@ -483,15 +486,9 @@ function approx_mineigval_lanczos( iter = 0 - Btvs = Vector{Tv}[] - for _ = 1:aux.n_symlowrank_matrices - s = size(aux.symlowrank_As[1].B, 2) - push!(Btvs, zeros(Tv, s)) - end - for i = 1:q iter += 1 - π’œt!(Av, aux, v) + π’œt!(Av, aux, v, var) alpha[i] = v' * Av if i == 1 @@ -523,25 +520,16 @@ function approx_mineigval_lanczos( return real.(min_eigval)[1] - 1 # cancel the shift end +set_rank!(::SDPData, ::Int) = nothing function rank_update!( + data, var::SolverVars{Ti, Tv}, ) where {Ti <: Integer, Tv} - n = size(var.Rt, 2) - m = size(var.Ξ», 1) r = var.r[] - max_r = barvinok_pataki(n, m) + max_r = barvinok_pataki(data) newr = min(max_r, r * 2) + set_rank!(data, newr) - newRt = 2 * rand(Tv, newr, n) .- 1 - newΞ» = randn(m) - - return SolverVars( - newRt, - zeros(Tv, size(newRt)), - newΞ», - Ref(Ti(newr)), - Ref(Tv(2.0)), - Ref(Tv(0.0)) - ) -end \ No newline at end of file + return SolverVars(data, newr) +end diff --git a/src/lbfgs.jl b/src/lbfgs.jl index bada3ec..2744211 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -1,14 +1,14 @@ """ Vector of L-BFGS """ -struct LBFGSVector{T} +struct LBFGSVector{T,Ts<:AbstractArray{T}} # notice that we use matrix instead # of vector to store s and y because our # decision variables are matrices # s = xβ‚–β‚Šβ‚ - xβ‚– - s::Matrix{T} + s::Ts # y = βˆ‡ f(xβ‚–β‚Šβ‚) - βˆ‡ f(xβ‚–) - y::Matrix{T} + y::Ts # ρ = 1/(⟨y, s⟩) ρ::Base.RefValue{T} # temporary variable @@ -35,14 +35,14 @@ Base.:length(lbfgshis::LBFGSHistory) = lbfgshis.m Initialization of L-BFGS history """ function lbfgs_init( - R::Matrix{Tv}, + R::AbstractArray{Tv}, numlbfgsvecs::Ti, ) where {Ti <: Integer, Tv} lbfgsvecs = LBFGSVector{Tv}[] for _ = 1:numlbfgsvecs push!(lbfgsvecs, - LBFGSVector(zeros(Tv, size(R)), - zeros(Tv, size(R)), + LBFGSVector(zero(R), + zero(R), Ref(zero(Tv)), Ref(zero(Tv)), )) @@ -86,9 +86,9 @@ for i = k - m, k - m + 1, ... k - 1 end """ function lbfgs_dir!( - dir::Matrix{Tv}, + dir::AbstractArray{Tv}, lbfgshis::LBFGSHistory{Ti, Tv}, - grad::Matrix{Tv}; + grad::AbstractArray{Tv}; negate::Bool=true, ) where{Ti <: Integer, Tv} # we store l-bfgs vectors as a cyclic array @@ -139,9 +139,9 @@ end Postprocessing step of L-BFGS. """ function lbfgs_update!( - dir::Matrix{Tv}, + dir::AbstractArray{Tv}, lbfgshis::LBFGSHistory{Ti, Tv}, - grad::Matrix{Tv}, + grad::AbstractArray{Tv}, stepsize::Tv, )where {Ti<:Integer, Tv} # if m = 0, LBFGS degenerates to gradient descent @@ -159,4 +159,3 @@ function lbfgs_update!( lbfgshis.latest[] = j end - diff --git a/src/linesearch.jl b/src/linesearch.jl index 72b3a56..12a4165 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -3,20 +3,20 @@ Exact line search for minimizing the augmented Lagrangian """ function linesearch!( var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, - Dt::Matrix{Tv}; + aux, + Dt::AbstractArray{Tv}; Ξ±_max = one(Tv), ) where{Ti <: Integer, Tv} - m = length(aux.primal_vio)-1 + m = length(var.primal_vio)-1 # evaluate 𝓐(RDα΅€ + DRα΅€) RD_dt = @elapsed begin - π’œ!(aux.A_RD, aux.UVt, aux, var.Rt, Dt) + π’œ!(var.A_RD, aux, var.Rt, Dt) end # remember we divide it by 2 in Aoper, now scale back - aux.A_RD .*= 2.0 + var.A_RD .*= 2.0 # evaluate 𝓐(DDα΅€) DD_dt = @elapsed begin - π’œ!(aux.A_DD, aux.UVt, aux, Dt, Dt) + π’œ!(var.A_DD, aux, Dt, Dt) end @debug "RD_dt, DD_dt" RD_dt, DD_dt @@ -26,7 +26,7 @@ function linesearch!( # p0 = ⟨ C, RRα΅€βŸ© = var.obj[] # p1 = ⟨ C, (RDα΅€ + DRα΅€)⟩ = C_RD # p2 = ⟨ C, DDα΅€βŸ© = C_DD - # (-q0) = 𝓐(RRα΅€) - b = aux.primal_vio + # (-q0) = 𝓐(RRα΅€) - b = var.primal_vio # q1 = 𝓐(RDα΅€ + DRα΅€) = 𝓐_RD # q2 = 𝓐(DDα΅€) = 𝓐_DD @@ -37,11 +37,11 @@ function linesearch!( # d = p1 - Ξ»α΅€ * q1 + Οƒ (-q0)α΅€ * q1 # e = p0 - Ξ»α΅€ * (-q0) + Οƒ / 2 * ||-q0||Β² p0 = var.obj[] - p1 = aux.A_RD[m+1] - p2 = aux.A_DD[m+1] - neg_q0 = @view aux.primal_vio[1:m] - q1 = @view aux.A_RD[1:m] - q2 = @view aux.A_DD[1:m] + p1 = var.A_RD[m+1] + p2 = var.A_DD[m+1] + neg_q0 = @view var.primal_vio[1:m] + q1 = @view var.A_RD[1:m] + q2 = @view var.A_DD[1:m] Οƒ = var.Οƒ[] biquadratic[1] = (p0 - dot(var.Ξ», neg_q0) + Οƒ * dot(neg_q0, neg_q0) / 2) @@ -119,8 +119,8 @@ function linesearch!( # notice that # 𝓐((R + Ξ±D)(R + Ξ±D)α΅€) = # 𝓐(RRα΅€) + Ξ± 𝓐(RDα΅€ + DRα΅€) + Ξ±Β² 𝓐(DDα΅€) - @. aux.primal_vio += Ξ±_star * (Ξ±_star * aux.A_DD + aux.A_RD) - var.obj[] = aux.primal_vio[m+1] + @. var.primal_vio += Ξ±_star * (Ξ±_star * var.A_DD + var.A_RD) + var.obj[] = var.primal_vio[m+1] return Ξ±_star, f_star -end \ No newline at end of file +end diff --git a/src/lowrankopt.jl b/src/lowrankopt.jl new file mode 100644 index 0000000..20c61d1 --- /dev/null +++ b/src/lowrankopt.jl @@ -0,0 +1,132 @@ +import SolverCore, NLPModels + +struct Solver <: SolverCore.AbstractOptimizationSolver + var::SolverVars{Int,Float64} + stats::SolverStats{Float64} + config::BurerMonteiroConfig{Int,Float64} +end + +function Solver(src::NLPModels.AbstractNLPModel; config=BurerMonteiroConfig{Int,Float64}(), kwargs...) + for (key, value) in kwargs + if hasfield(BurerMonteiroConfig, Symbol(key)) + setfield!(config, Symbol(key), value) + else + @error "Unrecognized keyword argument $key" + end + end + + if length(src.meta.ifree) != src.meta.nvar + error("SDPLRPlus only supports free variables, use `set_attribute(model, \"square_scalars\", true)`") + end + var = SolverVars(src, src.dim.ranks[]) + stats = SolverStats{Float64}() + return Solver(var, stats, config) +end + +function SolverCore.solve!( + solver::Solver, + model::NLPModels.AbstractNLPModel, # Same the one given in the constructor + stats::SolverCore.GenericExecutionStats; + kwargs..., +) + for (key, value) in kwargs + if hasfield(BurerMonteiroConfig, Symbol(key)) + setfield!(solver.config, Symbol(key), value) + else + @warn "Unrecognized keyword argument $key" + end + end + + ans = _sdplr(model, solver.var, model, solver.stats, solver.config) + stats.status = :first_order # TODO find the actual status + copyto!(stats.solution, solver.var.Rt) + copyto!(stats.multipliers, solver.var.Ξ») + stats.elapsed_time = ans["totaltime"] + return +end + +import LowRankOpt as LRO + +b_vector(model::LRO.BurerMonteiro.Model) = LRO.cons_constant(model.model) +C_matrix(model::LRO.BurerMonteiro.Model) = LRO.grad(model.model, LRO.MatrixIndex(1)) + +side_dimension(model::LRO.BurerMonteiro.Model) = model.dim.side_dimensions[] + +barvinok_pataki(data::LRO.BurerMonteiro.Model) = barvinok_pataki(side_dimension(data), data.meta.ncon) + +function set_rank!(model::LRO.BurerMonteiro.Model, r) + LRO.BurerMonteiro.set_rank!(model, LRO.MatrixIndex(1), r) +end + +function SolverVars(data::LRO.BurerMonteiro.Model, r) + # randomly initialize primal and dual variables + set_rank!(data, r) + Rt0 = 2 .* rand(length(data.dim)) .- 1 + Ξ»0 = randn(data.meta.ncon) + return SolverVars(Rt0, Ξ»0, r) +end + +function π’œ!( + π’œ_UUt::Vector{Tv}, + model::LRO.BurerMonteiro.Model, + x::Vector{Tv}, +) where {Tv} + m = model.meta.ncon + NLPModels.cons!(model, x, view(π’œ_UUt, 1:m)) + π’œ_UUt[end] = NLPModels.obj(model, x) +end + +function π’œ!( + π’œ_UVt::Vector{Tv}, + model::LRO.BurerMonteiro.Model, + u::Vector{Tv}, + v::Vector{Tv}, +) where {Tv} + m = model.meta.ncon + NLPModels.jprod!(model, u, v, view(π’œ_UVt, 1:m)) + π’œ_UVt[end] = LRO.BurerMonteiro.gprod(model, u, v) + π’œ_UVt ./= 2 + return +end + +function π’œt_preprocess!(::SolverVars, ::LRO.BurerMonteiro.Model) end + +function π’œt!( + Jtv::Vector, + x::Vector, + model::LRO.BurerMonteiro.Model, + var::SolverVars, +) + y = view(var.y, 1:model.meta.ncon) + NLPModels.jtprod!(model, x, y, Jtv) + Jtv .+= NLPModels.grad(model, x) + Jtv ./= 2 + return +end + +# `Jtv` is the transpose of the above one. +# It is only used when `Jtv` is a vector +# and we are working with the vectorization so +# it doesn't change anything. +function π’œt!( + Jtv::Vector, + model::LRO.BurerMonteiro.Model, + x::Vector, + var::SolverVars, +) + r = model.dim.ranks[1] + if r != 1 + set_rank!(model, 1) + end + i = LRO.MatrixIndex(1) + X = LRO.positive_semidefinite_factorization(x) + JtV = LRO.positive_semidefinite_factorization(Jtv) + LRO.BurerMonteiro.grad!(model, X, JtV, i) + y = view(var.y, 1:model.meta.ncon) + LRO.BurerMonteiro.add_jtprod!(model, X, y, JtV, i) + Jtv ./= 2 + if r != 1 + set_rank!(model, r) + end + return +end diff --git a/src/sdplr.jl b/src/sdplr.jl index 6ec8938..356515a 100644 --- a/src/sdplr.jl +++ b/src/sdplr.jl @@ -71,7 +71,7 @@ The default value is ``10^{-2}``. """ function sdplr( C::AbstractMatrix{Tv}, - As::Vector{Any}, + As::Vector, b::Vector{Tv}, r::Ti; config::BurerMonteiroConfig{Ti, Tv}=BurerMonteiroConfig{Ti, Tv}(), @@ -92,100 +92,10 @@ function sdplr( end preprocess_dt = @elapsed begin - m = length(As) - sparse_cons = Union{SparseMatrixCSC{Tv, Ti}, SparseMatrixCOO{Tv, Ti}}[] - symlowrank_cons = SymLowRankMatrix{Tv}[] - # treat diagonal matrices as sparse matrices - sparse_As_global_inds = Ti[] - symlowrank_As_global_inds = Ti[] - - for (i, A) in enumerate(As) - if isa(A, Union{SparseMatrixCSC, SparseMatrixCOO}) - push!(sparse_cons, A) - push!(sparse_As_global_inds, i) - elseif isa(A, Diagonal) - push!(sparse_cons, sparse(A)) - push!(sparse_As_global_inds, i) - elseif isa(A, SymLowRankMatrix) - push!(symlowrank_cons, A) - push!(symlowrank_As_global_inds, i) - else - @error "Currently only sparse\ - /symmetric low-rank\ - /diagonal constraints are supported." - end - end - - if isa(C, Union{SparseMatrixCSC, SparseMatrixCOO}) - push!(sparse_cons, C) - push!(sparse_As_global_inds, m+1) - elseif isa(C, Diagonal) - push!(sparse_cons, sparse(C)) - push!(sparse_As_global_inds, m+1) - elseif isa(C, SymLowRankMatrix) - push!(symlowrank_cons, C) - push!(symlowrank_As_global_inds, m+1) - else - @error "Currently only sparse\ - /lowrank/diagonal objectives are supported." - end - - @info "Finish classifying constraints." - - # preprocess sparse constraints - res = @timed begin - triu_agg_sparse_A, triu_agg_sparse_A_matptr, - triu_agg_sparse_A_nzind, triu_agg_sparse_A_nzval_one, - triu_agg_sparse_A_nzval_two, agg_sparse_A, - agg_sparse_A_mappedto_triu = preprocess_sparsecons(sparse_cons) - end - @debug "$(res.bytes)B allocated during preprocessing constraints." - - n = size(C, 1) - nnz_triu_agg_sparse_A = length(triu_agg_sparse_A.rowval) - - # randomly initialize primal and dual variables - Rt0 = 2 .* rand(r, n) .- 1 - Ξ»0 = randn(m) - - data = SDPData(n, m, C, As, b) - var = SolverVars( - Rt0, - zeros(Tv, size(Rt0)), - Ξ»0, - Ref(r), - Ref(2.0), # initial Οƒ - Ref(zero(Tv)), - ) - aux = SolverAuxiliary( - length(sparse_cons), - triu_agg_sparse_A_matptr, - triu_agg_sparse_A_nzind, - triu_agg_sparse_A_nzval_one, - triu_agg_sparse_A_nzval_two, - agg_sparse_A_mappedto_triu, - sparse_As_global_inds, - - triu_agg_sparse_A, - agg_sparse_A, - zeros(Tv, nnz_triu_agg_sparse_A), # UVt - zeros(Tv, m+1), zeros(Tv, m+1), # A_RD, A_DD - - length(symlowrank_cons), #n_symlowrank_matrices - symlowrank_cons, - symlowrank_As_global_inds, - - zeros(Tv, m+1), # y, auxiliary variable for π’œt - zeros(Tv, m+1), # primal_vio - ) - stats = SolverStats( - Ref(zero(Tv)), # starttime - Ref(zero(Tv)), # endtime - Ref(zero(Tv)), # time spent on lanczos with random start - Ref(zero(Tv)), # time spent on GenericArpack - Ref(zero(Tv)), # primal time - Ref(zero(Tv)), # DIMACS time - ) + data = SDPData(C, As, b) + var = SolverVars(data, r) + aux = SolverAuxiliary(data) + stats = SolverStats{Tv}() end @debug "preprocess dt" preprocess_dt @@ -203,16 +113,15 @@ function sdplr( return ans end - function _sdplr( - data::SDPData{Ti, Tv, TC}, + data, var::SolverVars{Ti, Tv}, - aux::SolverAuxiliary{Ti, Tv}, + aux, stats::SolverStats{Tv}, config::BurerMonteiroConfig{Ti, Tv}, -) where{Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} - n = data.n # size of decision variables - m = data.m # number of constraints +) where{Ti <: Integer, Tv} + n = side_dimension(aux) + m = length(var.Ξ») # number of constraints stats.starttime[] = time() @@ -222,8 +131,8 @@ function _sdplr( Ξ»0 = deepcopy(var.Ξ») # set up algorithm parameters - normb = norm(data.b, 2) - normC = norm(data.C, 2) + normb = norm(b_vector(data), 2) + normC = norm(C_matrix(data), 2) # initialize lbfgs datastructures lbfgshis = lbfgs_init(var.Rt, config.numlbfgsvecs) @@ -277,7 +186,7 @@ function _sdplr( end @debug "g time" g_dt grad_norm = norm(var.Gt, 2) / (1.0 + normC) - v = @view aux.primal_vio[1:m] + v = @view var.primal_vio[1:m] primal_vio_norm = norm(v, 2) / (1.0 + normb) # if change of the Lagrangian value is small enough @@ -331,6 +240,10 @@ function _sdplr( if primal_vio_norm <= cur_ptol if primal_vio_norm <= config.ptol @debug "primal vio is small enough, checking duality bound." + if config.objtol == Inf + @debug "`objtol` is `Inf`, skipping duality gap check" + break + end eig_iter = Ti(2*ceil(max(iter, 100)^0.5*log(n))) # when highprecision=true, then GenericArpack will be used @@ -358,17 +271,12 @@ function _sdplr( rank_double = true end #last_rel_duality_bound = rel_duality_bound - v = @view aux.primal_vio[1:m] - axpy!(-var.Οƒ[], v, var.Ξ») - cur_ptol = cur_ptol / var.Οƒ[]^0.9 - cur_gtol = cur_gtol / var.Οƒ[] end - else - v = @view aux.primal_vio[1:m] - axpy!(-var.Οƒ[], v, var.Ξ») - cur_ptol = cur_ptol / var.Οƒ[]^0.9 - cur_gtol = cur_gtol / var.Οƒ[] end + v = @view var.primal_vio[1:m] + axpy!(-var.Οƒ[], v, var.Ξ») + cur_ptol = cur_ptol / var.Οƒ[]^0.9 + cur_gtol = cur_gtol / var.Οƒ[] else var.Οƒ[] *= config.Οƒfac cur_ptol = 1 / var.Οƒ[]^0.1 @@ -377,14 +285,14 @@ function _sdplr( # when objective gap doesn't improve, we double the rank if rank_double - var = rank_update!(var) + var = rank_update!(data, var) cur_ptol = 1 / var.Οƒ[]^0.1 cur_gtol = 1 / var.Οƒ[] lbfgshis = lbfgs_init(var.Rt, config.numlbfgsvecs) dirt = similar(var.Rt) min_rel_duality_gap = 1e20 rankupd_tol_cnt = config.rankupd_tol - @info "rank doubled, newrank is $(size(var.Rt, 1))." + @info "rank doubled, newrank is $(var.r[])." else lbfgs_clear!(lbfgshis) end diff --git a/src/structs.jl b/src/structs.jl index df691be..4c52f25 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -165,14 +165,19 @@ end """ Structure for storing the data of a semidefinite programming problem. """ -struct SDPData{Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}} +struct SDPData{Ti <: Integer, Tv, TC <: AbstractMatrix{Tv}, TA} n::Ti # size of decision variables m::Ti # number of constraints C::TC # cost matrix - As::Vector{Any} # set of constraint matrices + As::Vector{TA} # set of constraint matrices b::Vector{Tv} # right-hand side b end +SDPData(C, As, b) = SDPData(size(C, 1), length(As), C, As, b) + +b_vector(model) = model.b +C_matrix(model) = model.C + # The scalar variables in the following three structures # are stored by RefValue such that we can declare the structures # as unmutable structs. @@ -183,14 +188,46 @@ end """ Structure for storing the variables used in the solver. """ -struct SolverVars{Ti <: Integer,Tv} - Rt::Matrix{Tv} # primal variables X = RR^T - Gt::Matrix{Tv} # gradient w.r.t. R +struct SolverVars{Ti <: Integer,Tv,TR<:AbstractArray{Tv}} + Rt::TR # primal variables X = RR^T + Gt::TR # gradient w.r.t. R Ξ»::Vector{Tv} # dual variables r::Base.RefValue{Ti} # predetermined rank of R, i.e. R ∈ ℝⁿˣʳ Οƒ::Base.RefValue{Tv} # penalty parameter obj::Base.RefValue{Tv} # objective + + # auxiliary variable y = -Ξ» + Οƒ * primal_vio + y::Vector{Tv} + # violation of constraints, for convenience, we store + # a length (m+1) vector where + # the first m entries correspond to the primal violation + # and the last entry corresponds to the objective + primal_vio::Vector{Tv} + A_RD::Vector{Tv} + A_DD::Vector{Tv} +end + +function SolverVars(data::SDPData, r) + # randomly initialize primal and dual variables + Rt0 = 2 .* rand(r, data.n) .- 1 + Ξ»0 = randn(data.m) + return SolverVars(Rt0, Ξ»0, r) +end + +function SolverVars(Rt0, Ξ»0::Vector{Tv}, r) where {Tv} + m = length(Ξ»0) + return SolverVars( + Rt0, + zeros(Tv, size(Rt0)), + Ξ»0, + Ref(r), + Ref(2.0), # initial Οƒ + Ref(zero(Tv)), + zeros(Tv, m+1), # y, auxiliary variable for π’œt + zeros(Tv, m+1), # primal_vio + zeros(Tv, m+1), zeros(Tv, m+1), # A_RD, A_DD + ) end @@ -213,23 +250,84 @@ struct SolverAuxiliary{Ti <: Integer, Tv} triu_sparse_S::SparseMatrixCSC{Tv, Ti} sparse_S::SparseMatrixCSC{Tv, Ti} UVt::Vector{Tv} - A_RD::Vector{Tv} - A_DD::Vector{Tv} # symmetric low-rank constraints n_symlowrank_matrices::Ti symlowrank_As::Vector{SymLowRankMatrix{Tv}} symlowrank_As_global_inds::Vector{Ti} - - # auxiliary variable y = -Ξ» + Οƒ * primal_vio - y::Vector{Tv} - # violation of constraints, for convenience, we store - # a length (m+1) vector where - # the first m entries correspond to the primal violation - # and the last entry corresponds to the objective - primal_vio::Vector{Tv} end +function SolverAuxiliary(data::SDPData{Ti,Tv}) where {Ti,Tv} + sparse_cons = Union{SparseMatrixCSC{Tv, Ti}, SparseMatrixCOO{Tv, Ti}}[] + symlowrank_cons = SymLowRankMatrix{Tv}[] + # treat diagonal matrices as sparse matrices + sparse_As_global_inds = Ti[] + symlowrank_As_global_inds = Ti[] + + for (i, A) in enumerate(data.As) + if isa(A, Union{SparseMatrixCSC, SparseMatrixCOO}) + push!(sparse_cons, A) + push!(sparse_As_global_inds, i) + elseif isa(A, Diagonal) + push!(sparse_cons, sparse(A)) + push!(sparse_As_global_inds, i) + elseif isa(A, SymLowRankMatrix) + push!(symlowrank_cons, A) + push!(symlowrank_As_global_inds, i) + else + @error "Currently only sparse\ + /symmetric low-rank\ + /diagonal constraints are supported." + end + end + + if isa(data.C, Union{SparseMatrixCSC, SparseMatrixCOO}) + push!(sparse_cons, data.C) + push!(sparse_As_global_inds, data.m+1) + elseif isa(data.C, Diagonal) + push!(sparse_cons, sparse(data.C)) + push!(sparse_As_global_inds, data.m+1) + elseif isa(data.C, SymLowRankMatrix) + push!(symlowrank_cons, data.C) + push!(symlowrank_As_global_inds, data.m+1) + else + @error "Currently only sparse\ + /lowrank/diagonal objectives are supported." + end + + @info "Finish classifying constraints." + + # preprocess sparse constraints + res = @timed begin + triu_agg_sparse_A, triu_agg_sparse_A_matptr, + triu_agg_sparse_A_nzind, triu_agg_sparse_A_nzval_one, + triu_agg_sparse_A_nzval_two, agg_sparse_A, + agg_sparse_A_mappedto_triu = preprocess_sparsecons(sparse_cons) + end + @debug "$(res.bytes)B allocated during preprocessing constraints." + + nnz_triu_agg_sparse_A = length(triu_agg_sparse_A.rowval) + + return SolverAuxiliary( + length(sparse_cons), + triu_agg_sparse_A_matptr, + triu_agg_sparse_A_nzind, + triu_agg_sparse_A_nzval_one, + triu_agg_sparse_A_nzval_two, + agg_sparse_A_mappedto_triu, + sparse_As_global_inds, + + triu_agg_sparse_A, + agg_sparse_A, + zeros(Tv, nnz_triu_agg_sparse_A), # UVt + + length(symlowrank_cons), #n_symlowrank_matrices + symlowrank_cons, + symlowrank_As_global_inds, + ) +end + +side_dimension(aux::SolverAuxiliary) = size(aux.sparse_S, 1) struct SolverStats{Tv} starttime::Base.RefValue{Tv} # timing @@ -238,8 +336,14 @@ struct SolverStats{Tv} dual_GenericArpack_time::Base.RefValue{Tv} # total time - dual_lanczos_time - dual_GenericArpack_time primal_time::Base.RefValue{Tv} DIMACS_time::Base.RefValue{Tv} # time spent on computing the DIMACS stats which is not included in the total time + function SolverStats{Tv}() where {Tv} + return new{Tv}( + Ref(zero(Tv)), # starttime + Ref(zero(Tv)), # endtime + Ref(zero(Tv)), # time spent on lanczos with random start + Ref(zero(Tv)), # time spent on GenericArpack + Ref(zero(Tv)), # primal time + Ref(zero(Tv)), # DIMACS time + ) + end end - - - - diff --git a/src/utils.jl b/src/utils.jl index 8fa8dfd..bd77785 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -6,4 +6,6 @@ for a SDP problem where X has size n x n and there are m constraints. """ function barvinok_pataki(n::T, m::T) where {T <: Int} return min(n, T(floor(sqrt(2*m)+1))) -end \ No newline at end of file +end + +barvinok_pataki(data::SDPData) = barvinok_pataki(data.n, data.m)