Skip to content

Commit 6037131

Browse files
committed
maximal flexibility
1 parent 6e823d4 commit 6037131

File tree

7 files changed

+130
-84
lines changed

7 files changed

+130
-84
lines changed

Diff for: Project.toml

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,26 @@
11
name = "OptimKit"
22
uuid = "77e91f04-9b3b-57a6-a776-40b61faaebe0"
33
authors = ["Jutho Haegeman"]
4-
version = "0.4"
4+
version = "0.4.0"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
88
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
9+
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
910

1011
[compat]
12+
Aqua = "0.8"
13+
LinearAlgebra = "1"
14+
Printf = "1"
15+
Random = "1"
16+
ScopedValues = "1.3.0"
17+
Test = "1"
1118
julia = "1.6"
1219

1320
[extras]
14-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
21+
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
1522
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
23+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1624

1725
[targets]
18-
test = ["Test", "Random"]
26+
test = ["Test", "Random", "Aqua"]

Diff for: src/OptimKit.jl

+10
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,18 @@ module OptimKit
22

33
using LinearAlgebra: LinearAlgebra
44
using Printf
5+
using ScopedValues
56
using Base: @kwdef
67

8+
# Default values for the keyword arguments using ScopedValues
9+
const LS_MAXITER = ScopedValue(10)
10+
const LS_MAXFG = ScopedValue(20)
11+
const LS_VERBOSITY = ScopedValue(1)
12+
13+
const GRADTOL = ScopedValue(1e-8)
14+
const MAXITER = ScopedValue(1_000_000)
15+
const VERBOSITY = ScopedValue(1)
16+
717
_retract(x, d, α) = (x + α * d, d)
818
_inner(x, v1, v2) = v1 === v2 ? LinearAlgebra.norm(v1)^2 : LinearAlgebra.dot(v1, v2)
919
_transport!(v, xold, d, α, xnew) = v

Diff for: src/cg.jl

+32-21
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,41 @@ abstract type CGFlavor end
55
ConjugateGradient(;
66
flavor::CGFlavor=HagerZhang(),
77
restart::Int=typemax(Int);
8-
maxiter::Int=typemax(Int),
9-
gradtol::Real=1e-8,
10-
verbosity::Int=1,
11-
ls_verbosity::Int=1,
12-
linesearch::AbstractLineSearch=HagerZhangLineSearch())
8+
maxiter::Int=MAXITER[], # 1_000_000
9+
gradtol::Real=GRADTOL[], # 1e-8
10+
verbosity::Int=VERBOSITY[], # 1
11+
ls_maxiter::Int=LS_MAXITER[], # 10
12+
ls_maxfg::Int=LS_MAXFG[], # 20
13+
ls_verbosity::Int=LS_VERBOSITY[], # 1
14+
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
1315
1416
ConjugateGradient optimization algorithm.
1517
16-
## Fields
17-
- `flavor`: The flavor of the conjugate gradient algorithm (for selecting the β parameter)
18+
## Parameters
19+
- `flavor`: The flavor of the conjugate gradient algorithm (for selecting the β parameter; see below)
1820
- `restart::Int`: The number of iterations after which to reset the search direction.
1921
- `maxiter::Int`: The maximum number of iterations.
2022
- `gradtol::T`: The tolerance for the norm of the gradient.
21-
- `linesearch::L`: The line search algorithm to use.
2223
- `verbosity::Int`: The verbosity level of the optimization algorithm.
24+
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
25+
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
2326
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
27+
- `linesearch`: The line search algorithm to use; if a custom value is provided,
28+
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
2429
2530
Both verbosity levels use the following scheme:
2631
- 0: no output
2732
- 1: only warnings upon non-convergence
2833
- 2: convergence information at the end of the algorithm
2934
- 3: progress information after each iteration
3035
- 4: more detailed information (only for the linesearch)
36+
37+
The `flavor` parameter can take the values
38+
- `HagerZhang(; η::Real=4 // 10, θ::Real=1 // 1)`: Hager-Zhang formula for β
39+
- `HestenesStiefel(; pos = true)`: Hestenes-Stiefel formula for β
40+
- `FletcherReeves()`: Fletcher-Reeves formula for β
41+
- `PolakRibiere(; pos = true)`: Polak-Ribiere formula for β
42+
- `DaiYuan()`: Dai-Yuan formula for β
3143
"""
3244
struct ConjugateGradient{F<:CGFlavor,T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
3345
flavor::F
@@ -36,20 +48,21 @@ struct ConjugateGradient{F<:CGFlavor,T<:Real,L<:AbstractLineSearch} <: Optimizat
3648
gradtol::T
3749
verbosity::Int
3850
linesearch::L
39-
ls_maxiter::Int
40-
ls_verbosity::Int
4151
end
4252
function ConjugateGradient(;
4353
flavor::CGFlavor=HagerZhang(),
4454
restart::Int=typemax(Int),
45-
maxiter::Int=typemax(Int),
46-
gradtol::Real=1e-8,
47-
verbosity::Int=1,
48-
ls_maxiter::Int=10,
49-
ls_verbosity::Int=1,
50-
linesearch::AbstractLineSearch=HagerZhangLineSearch())
51-
return ConjugateGradient(flavor, restart, maxiter, gradtol, verbosity,
52-
linesearch, ls_maxiter, ls_verbosity)
55+
maxiter::Int=MAXITER[],
56+
gradtol::Real=GRADTOL[],
57+
verbosity::Int=VERBOSITY[],
58+
ls_maxiter::Int=LS_MAXITER[],
59+
ls_maxfg::Int=LS_MAXFG[],
60+
ls_verbosity::Int=LS_VERBOSITY[],
61+
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
62+
maxiter=ls_maxiter,
63+
maxfg=ls_maxfg,
64+
verbosity=ls_verbosity))
65+
return ConjugateGradient(flavor, restart, maxiter, gradtol, verbosity, linesearch)
5366
end
5467

5568
function optimize(fg, x, alg::ConjugateGradient;
@@ -118,9 +131,7 @@ function optimize(fg, x, alg::ConjugateGradient;
118131
_dlast[] = η
119132
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
120133
initialguess=α,
121-
retract=retract, inner=inner,
122-
maxiter=alg.ls_maxiter,
123-
verbosity=alg.ls_verbosity)
134+
retract=retract, inner=inner)
124135
numfg += nfg
125136
numiter += 1
126137
x, f, g = finalize!(x, f, g, numiter)

Diff for: src/gd.jl

+26-23
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
"""
2-
struct GradientDescent{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
32
GradientDescent(;
4-
maxiter = typemax(Int),
5-
gradtol::Real = 1e-8,
6-
verbosity::Int = 1,
7-
ls_verbosity::Int = 1,
8-
linesearch::AbstractLineSearch = HagerZhangLineSearch())
3+
maxiter::Int=MAXITER[], # 1_000_000
4+
gradtol::Real=GRADTOL[], # 1e-8
5+
verbosity::Int=VERBOSITY[], # 1
6+
ls_maxiter::Int=LS_MAXITER[], # 10
7+
ls_maxfg::Int=LS_MAXFG[], # 20
8+
ls_verbosity::Int=LS_VERBOSITY[], # 1
9+
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
10+
911
1012
Gradient Descent optimization algorithm.
1113
12-
## Fields
14+
## Parameters
1315
- `maxiter::Int`: The maximum number of iterations.
1416
- `gradtol::T`: The tolerance for the norm of the gradient.
15-
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
16-
- `linesearch::L`: The line search algorithm to use.
1717
- `verbosity::Int`: The verbosity level of the optimization algorithm.
18+
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
19+
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
1820
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
21+
- `linesearch`: The line search algorithm to use; if a custom value is provided,
22+
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
1923
20-
Both verbosity levels use the following scheme:
24+
Both `verbosity` and `ls_verbosity` use the following scheme:
2125
- 0: no output
2226
- 1: only warnings upon non-convergence
2327
- 2: convergence information at the end of the algorithm
@@ -29,18 +33,19 @@ struct GradientDescent{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
2933
gradtol::T
3034
verbosity::Int
3135
linesearch::L
32-
ls_maxiter::Int
33-
ls_verbosity::Int
3436
end
3537
function GradientDescent(;
36-
maxiter::Int=typemax(Int),
37-
gradtol::Real=1e-8,
38-
verbosity::Int=1,
39-
ls_maxiter::Int=10,
40-
ls_verbosity::Int=1,
41-
linesearch::AbstractLineSearch=HagerZhangLineSearch())
42-
return GradientDescent(maxiter, gradtol, verbosity,
43-
linesearch, ls_maxiter, ls_verbosity)
38+
maxiter::Int=MAXITER[],
39+
gradtol::Real=GRADTOL[],
40+
verbosity::Int=VERBOSITY[],
41+
ls_maxiter::Int=LS_MAXITER[],
42+
ls_maxfg::Int=LS_MAXFG[],
43+
ls_verbosity::Int=LS_VERBOSITY[],
44+
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
45+
maxiter=ls_maxiter,
46+
maxfg=ls_maxfg,
47+
verbosity=ls_verbosity))
48+
return GradientDescent(maxiter, gradtol, verbosity, linesearch)
4449
end
4550

4651
function optimize(fg, x, alg::GradientDescent;
@@ -83,9 +88,7 @@ function optimize(fg, x, alg::GradientDescent;
8388
_dlast[] = η
8489
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
8590
initialguess=α,
86-
retract=retract, inner=inner,
87-
maxiter=alg.ls_maxiter,
88-
verbosity=alg.ls_verbosity)
91+
retract=retract, inner=inner)
8992
numfg += nfg
9093
numiter += 1
9194
x, f, g = finalize!(x, f, g, numiter)

Diff for: src/lbfgs.jl

+26-23
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,29 @@
11
"""
2-
struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
32
LBFGS(m::Int = 8;
4-
maxiter = typemax(Int),
5-
gradtol::Real = 1e-8,
63
acceptfirst::Bool = true,
7-
verbosity::Int = 1,
8-
ls_verbosity::Int = 1,
9-
linesearch::AbstractLineSearch = HagerZhangLineSearch())
4+
maxiter::Int=MAXITER[], # 1_000_000
5+
gradtol::Real=GRADTOL[], # 1e-8
6+
verbosity::Int=VERBOSITY[], # 1
7+
ls_maxiter::Int=LS_MAXITER[], # 10
8+
ls_maxfg::Int=LS_MAXFG[], # 20
9+
ls_verbosity::Int=LS_VERBOSITY[], # 1
10+
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
1011
1112
LBFGS optimization algorithm.
1213
13-
## Fields
14+
## Parameters
1415
- `m::Int`: The number of previous iterations to store for the limited memory BFGS approximation.
1516
- `maxiter::Int`: The maximum number of iterations.
1617
- `gradtol::T`: The tolerance for the norm of the gradient.
17-
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
18-
- `linesearch::L`: The line search algorithm to use.
1918
- `verbosity::Int`: The verbosity level of the optimization algorithm.
19+
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
20+
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
21+
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
2022
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
23+
- `linesearch`: The line search algorithm to use; if a custom value is provided,
24+
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
2125
22-
Both verbosity levels use the following scheme:
26+
Both `verbosity` and `ls_verbosity` use the following scheme:
2327
- 0: no output
2428
- 1: only warnings upon non-convergence
2529
- 2: convergence information at the end of the algorithm
@@ -33,19 +37,20 @@ struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
3337
acceptfirst::Bool
3438
verbosity::Int
3539
linesearch::L
36-
ls_maxiter::Int
37-
ls_verbosity::Int
3840
end
3941
function LBFGS(m::Int=8;
40-
maxiter::Int=typemax(Int),
41-
gradtol::Real=1e-8,
4242
acceptfirst::Bool=true,
43-
verbosity::Int=1,
44-
ls_maxiter::Int=10,
45-
ls_verbosity::Int=1,
46-
linesearch::AbstractLineSearch=HagerZhangLineSearch())
47-
return LBFGS(m, maxiter, gradtol, acceptfirst, verbosity,
48-
linesearch, ls_maxiter, ls_verbosity)
43+
maxiter::Int=MAXITER[],
44+
gradtol::Real=GRADTOL[],
45+
verbosity::Int=VERBOSITY[],
46+
ls_maxiter::Int=LS_MAXITER[],
47+
ls_maxfg::Int=LS_MAXFG[],
48+
ls_verbosity::Int=LS_VERBOSITY[],
49+
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
50+
maxiter=ls_maxiter,
51+
maxfg=ls_maxfg,
52+
verbosity=ls_verbosity))
53+
return LBFGS(m, maxiter, gradtol, acceptfirst, verbosity, linesearch)
4954
end
5055

5156
function optimize(fg, x, alg::LBFGS;
@@ -103,9 +108,7 @@ function optimize(fg, x, alg::LBFGS;
103108
initialguess=one(f),
104109
acceptfirst=alg.acceptfirst,
105110
# for some reason, line search seems to converge to solution alpha = 2 in most cases if acceptfirst = false. If acceptfirst = true, the initial value of alpha can immediately be accepted. This typically leads to a more erratic convergence of normgrad, but to less function evaluations in the end.
106-
retract=retract, inner=inner,
107-
maxiter=alg.ls_maxiter,
108-
verbosity=alg.ls_verbosity)
111+
retract=retract, inner=inner)
109112
numfg += nfg
110113
numiter += 1
111114
x, f, g = finalize!(x, f, g, numiter)

Diff for: src/linesearches.jl

+16-7
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ struct HagerZhangLineSearch{T<:Real} <: AbstractLineSearch
3030
θ::T # parameter regulating the bisection step
3131
γ::T # parameter triggering the bisection step, namely if bracket reduction rate is slower than `γ`
3232
ρ::T # parameter controlling the initial bracket expansion rate
33+
maxiter::Int # hard limit on the number of iterations
34+
maxfg::Int # soft limit on the number of function evaluations
35+
verbosity::Int # verbosity level
3336
end
3437

3538
"""
@@ -57,16 +60,19 @@ function HagerZhangLineSearch(; c₁::Real=1 // 10,
5760
ϵ::Real=1 // 10^6,
5861
θ::Real=1 // 2,
5962
γ::Real=2 // 3,
60-
ρ::Real=5 // 1)
61-
return HagerZhangLineSearch(promote(c₁, c₂, ϵ, θ, γ, ρ)...)
63+
ρ::Real=5 // 1,
64+
maxiter::Int=LS_MAXITER[],
65+
maxfg::Int=LS_MAXFG[],
66+
verbosity::Int=LS_VERBOSITY[])
67+
return HagerZhangLineSearch(promote(c₁, c₂, ϵ, θ, γ, ρ)..., maxiter, maxfg, verbosity)
6268
end
6369

6470
# implementation as function
6571
"""
6672
(ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀ = fg(x₀);
6773
retract = _retract, inner = _inner,
6874
initialguess = one(fg₀[1]), acceptfirst = false,
69-
maxiter = 50, maxfuneval = 100, verbosity = 0)
75+
maxiter = ls.maxiter, maxfg = lsmaxfg, verbosity = ls.verbosity)
7076
7177
Perform a Hager-Zhang line search to find a step length that satisfies the (approximate) Wolfe conditions.
7278
@@ -84,7 +90,7 @@ Perform a Hager-Zhang line search to find a step length that satisfies the (appr
8490
- `acceptfirst::Bool`: Parameter that controls whether the initial guess can be accepted if it satisfies the strong Wolfe conditions. Defaults to `false`, thus requiring
8591
at least one line search iteration and one extra function evaluation.
8692
- `maxiter::Int`: Hard limit on the number of iterations. Default is `50`.
87-
- `maxfuneval::Int`: Soft limit on the number of function evaluations. Default is `100`.
93+
- `maxfg::Int`: Soft limit on the number of function evaluations. Default is `100`.
8894
- `verbosity::Int`: The verbosity level (see below). Default is `0`.
8995
9096
### Verbosity Levels
@@ -104,8 +110,11 @@ Perform a Hager-Zhang line search to find a step length that satisfies the (appr
104110
"""
105111
function (ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀=fg(x₀);
106112
retract=_retract, inner=_inner,
107-
initialguess::Real=one(fg₀[1]), acceptfirst::Bool=false,
108-
maxiter::Int=50, maxfuneval::Int=100, verbosity::Int=0)
113+
initialguess::Real=one(fg₀[1]),
114+
acceptfirst::Bool=false,
115+
maxiter::Int=ls.maxiter,
116+
maxfg::Int=ls.maxfg,
117+
verbosity::Int=ls.verbosity)
109118
(f₀, g₀) = fg₀
110119
ϕ₀ = f₀
111120
dϕ₀ = inner(x₀, g₀, η₀)
@@ -134,7 +143,7 @@ function (ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀=fg(x₀);
134143
@info @sprintf("Linesearch converged after %d iterations and %d function evaluations:\nα = %.2e, dϕ = %.2e, ϕ - ϕ₀ = %.2e",
135144
k, numfg, α, dϕ, f - ϕ₀)
136145
return x, f, g, ξ, α, numfg
137-
elseif k == maxiter || numfg >= maxfuneval
146+
elseif k >= maxiter || numfg >= maxfg
138147
verbosity >= 1 &&
139148
@warn @sprintf("Linesearch not converged after %d iterations and %d function evaluations:\nα = %.2e, dϕ = %.2e, ϕ - ϕ₀ = %.2e",
140149
k, numfg, α, dϕ, f - ϕ₀)

0 commit comments

Comments
 (0)