Skip to content

Commit

Permalink
Refactor tranquilo code (#395)
Browse files Browse the repository at this point in the history
  • Loading branch information
janosg authored Nov 4, 2022
1 parent 4b9019f commit e0b5b6c
Show file tree
Hide file tree
Showing 10 changed files with 410 additions and 180 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: debug-statements
- id: end-of-file-fixer
- repo: https://github.com/asottile/reorder_python_imports
rev: v3.8.5
rev: v3.9.0
hooks:
- id: reorder-python-imports
types: [python]
Expand Down
46 changes: 13 additions & 33 deletions src/estimagic/optimization/tranquilo/aggregate_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,14 @@ def get_aggregator(aggregator, functype, model_info):
Args:
aggregator (str or callable): Name of an aggregator or aggregator function.
The function must take as arguments (in that order):
The function must take as argument:
- vector_model (VectorModel): A fitted vector model.
- fvec_center (np.ndarray): A 1d array of the residuals at the center of the
trust-region. In the noisy case, this may be an average.
- model_info (ModelInfo): The model information.
functype (str): One of "scalar", "least_squares" and "likelihood".
model_info (ModelInfo): Information that describes the functional form of
the model.
Returns:
callable: The partialled aggregator that only depends on vector_model and
fvec_center.
callable: The partialled aggregator that only depends on vector_model.
"""
built_in_aggregators = {
Expand All @@ -46,7 +42,7 @@ def get_aggregator(aggregator, functype, model_info):

# determine if aggregator is compatible with functype and model_info
aggregator_compatible_with_functype = {
"scalar": ("identity", "identity_old", "sum"),
"scalar": ("identity", "sum"),
"least_squares": ("least_squares_linear",),
"likelihood": (
"sum",
Expand Down Expand Up @@ -83,45 +79,29 @@ def get_aggregator(aggregator, functype, model_info):
)

# create aggregator
out = partial(
_aggregate_models_template, aggregator=_aggregator, model_info=model_info
)
out = partial(_aggregate_models_template, aggregator=_aggregator)
return out


def _aggregate_models_template(vector_model, fvec_center, aggregator, model_info):
def _aggregate_models_template(vector_model, aggregator):
"""Aggregate a VectorModel into a ScalarModel.
Note on fvec_center:
--------------------
Let x0 be the x-value at which the x-sample is centered. If there is little noise
and the criterion function f is evaluated at x0, then fvec_center = f(x0). If,
however, the criterion function is very noisy or only evaluated in a neighborhood
around x0, then fvec_center is constructed as an average over evaluations of f
with x close to x0.
Args:
vector_model (VectorModel): The VectorModel to aggregate.
fvec_center (np.ndarray): A 1d array of the residuals at the center of the
trust-region. In the noisy case, this may be an average.
aggregator (callable): The function that does the actual aggregation.
model_info (ModelInfo): Information that describes the functional form of
the model.
Returns:
ScalarModel: The aggregated model
"""
intercept, linear_terms, square_terms = aggregator(
vector_model, fvec_center, model_info
)
intercept, linear_terms, square_terms = aggregator(vector_model)
scalar_model = ScalarModel(
intercept=intercept, linear_terms=linear_terms, square_terms=square_terms
)
return scalar_model


def aggregator_identity(vector_model, fvec_center, model_info):
def aggregator_identity(vector_model):
"""Aggregate quadratic VectorModel using identity function.
This aggregation is useful if the underlying maximization problem is a scalar
Expand All @@ -135,14 +115,14 @@ def aggregator_identity(vector_model, fvec_center, model_info):
"""
intercept = float(vector_model.intercepts)
linear_terms = np.squeeze(vector_model.linear_terms)
if model_info.has_squares or model_info.has_interactions:
square_terms = np.squeeze(vector_model.square_terms)
else:
if vector_model.square_terms is None:
square_terms = np.zeros((len(linear_terms), len(linear_terms)))
else:
square_terms = np.squeeze(vector_model.square_terms)
return intercept, linear_terms, square_terms


def aggregator_sum(vector_model, fvec_center, model_info):
def aggregator_sum(vector_model):
"""Aggregate quadratic VectorModel using sum function.
This aggregation is useful if the underlying maximization problem is a likelihood
Expand All @@ -163,7 +143,7 @@ def aggregator_sum(vector_model, fvec_center, model_info):
return intercept, linear_terms, square_terms


def aggregator_least_squares_linear(vector_model, fvec_center, model_info):
def aggregator_least_squares_linear(vector_model):
"""Aggregate linear VectorModel assuming a least_squares functype.
This aggregation is useful if the underlying maximization problem is a least-squares
Expand All @@ -190,7 +170,7 @@ def aggregator_least_squares_linear(vector_model, fvec_center, model_info):
return intercept, linear_terms, square_terms


def aggregator_information_equality_linear(vector_model, fvec_center, model_info):
def aggregator_information_equality_linear(vector_model):
"""Aggregate linear VectorModel using the Fisher information equality.
This aggregation is useful if the underlying maximization problem is a likelihood
Expand Down
32 changes: 24 additions & 8 deletions src/estimagic/optimization/tranquilo/filter_points.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from estimagic.optimization.tranquilo.models import n_second_order_terms
from numba import njit
from scipy.linalg import qr_multiply

Expand Down Expand Up @@ -62,7 +63,7 @@ def drop_collinear_pounders(xs, indices, state):
def _drop_collinear_pounders(xs, indices, state):
theta2 = 1e-4
n_samples, n_params = xs.shape
n_poly_terms = n_params * (n_params + 1) // 2
n_poly_terms = n_second_order_terms(n_params)

indices_reverse = indices[::-1]
indexer_reverse = np.arange(n_samples)[::-1]
Expand Down Expand Up @@ -97,7 +98,9 @@ def _drop_collinear_pounders(xs, indices, state):
continue

linear_features[counter, 1:] = centered_xs[index]
square_features[counter, :] = _square_features(linear_features[counter, 1:])
square_features[counter, :] = _scaled_square_features(
linear_features[counter, 1:]
)

linear_features_pad = np.zeros((n_samples, n_samples))
linear_features_pad[:n_samples, : n_params + 1] = linear_features
Expand Down Expand Up @@ -127,15 +130,15 @@ def _get_polynomial_feature_matrices(
square_features = np.zeros((n_samples, n_poly_terms))

linear_features[0, 1:] = centered_xs[indexer[index_center]]
square_features[0, :] = _square_features(linear_features[0, 1:]).flatten()
square_features[0, :] = _scaled_square_features(linear_features[0, 1:]).flatten()

_is_center_in_head = index_center < n_params
idx_list_n = [i for i in range(n_params + _is_center_in_head) if i != index_center]
idx_list_n_plus_1 = [index_center] + idx_list_n

linear_features[:, 0] = 1
linear_features[: n_params + 1, 1:] = centered_xs[indexer[idx_list_n_plus_1]]
square_features[: n_params + 1, :] = _square_features(
square_features[: n_params + 1, :] = _scaled_square_features(
linear_features[: n_params + 1, 1:]
)

Expand All @@ -145,16 +148,29 @@ def _get_polynomial_feature_matrices(


@njit
def _square_features(x):
def _scaled_square_features(x):
"""Construct scaled interaction and square terms.
The interaction terms are scaled by 1 / sqrt{2} while the square terms are scaled
by 1 / 2.
Args:
x (np.ndarray): Array of shape (n_samples, n_params).
Returns:
np.ndarray: Scaled interaction and square terms. Has shape (n_samples,
n_params + (n_params - 1) * n_params / 1).
"""
n_samples, n_params = np.atleast_2d(x).shape
n_poly_terms = n_params * (n_params + 1) // 2
n_poly_terms = n_second_order_terms(n_params)

poly_terms = np.empty((n_poly_terms, n_samples), x.dtype)
poly_terms = np.empty((n_poly_terms, n_samples), np.float64)
xt = x.T

idx = 0
for i in range(n_params):
poly_terms[idx] = 0.5 * xt[i] ** 2
poly_terms[idx] = xt[i] ** 2 / 2
idx += 1

for j in range(i + 1, n_params):
Expand Down
15 changes: 8 additions & 7 deletions src/estimagic/optimization/tranquilo/fit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import numpy as np
from estimagic.optimization.tranquilo.models import ModelInfo
from estimagic.optimization.tranquilo.models import n_interactions
from estimagic.optimization.tranquilo.models import n_second_order_terms
from estimagic.optimization.tranquilo.models import VectorModel
from numba import njit
from scipy.linalg import qr_multiply
Expand Down Expand Up @@ -369,16 +371,15 @@ def _get_current_fit_minimal_frobenius_norm_of_hessian(

for k in range(n_residuals):
z_y_vec = np.dot(z_mat.T, y[:, k])
coeffs_first_stage = np.linalg.solve(
np.atleast_2d(n_z_mat_square),
np.atleast_1d(z_y_vec),
coeffs_first_stage, *_ = np.linalg.lstsq(
np.atleast_2d(n_z_mat_square), np.atleast_1d(z_y_vec), rcond=None
)

coeffs_second_stage = np.atleast_2d(n_z_mat) @ coeffs_first_stage

rhs = y[:, k] - n_mat @ coeffs_second_stage

alpha = np.linalg.solve(m_mat, rhs[: n_params + 1])
alpha, *_ = np.linalg.lstsq(m_mat, rhs[: n_params + 1], rcond=None)
coeffs_linear[k, :] = alpha[offset : (n_params + 1)]

coeffs_square[k] = coeffs_second_stage
Expand Down Expand Up @@ -442,11 +443,11 @@ def _polynomial_features(x, has_squares):
n_samples, n_params = x.shape

if has_squares:
n_poly_terms = n_params * (n_params + 1) // 2
n_poly_terms = n_second_order_terms(n_params)
else:
n_poly_terms = n_params * (n_params - 1) // 2
n_poly_terms = n_interactions(n_params)

poly_terms = np.empty((n_poly_terms, n_samples), x.dtype)
poly_terms = np.empty((n_poly_terms, n_samples), np.float64)
xt = x.T

idx = 0
Expand Down
50 changes: 50 additions & 0 deletions src/estimagic/optimization/tranquilo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Union

import numpy as np
from numba import njit


class VectorModel(NamedTuple):
Expand Down Expand Up @@ -53,3 +54,52 @@ def evaluate_model(scalar_model, centered_x):
y += x.T @ scalar_model.square_terms @ x / 2

return y


def n_free_params(dim, info_or_name):
"""Number of free parameters in a model specified by name or model_info."""
out = dim + 1
if isinstance(info_or_name, ModelInfo):
info = info_or_name
if info.has_squares:
out += dim
if info.has_interactions:
out += n_interactions(dim)
elif isinstance(info_or_name, str) and info_or_name in (
"linear",
"quadratic",
"diagonal",
):
name = info_or_name
if name == "quadratic":
out += n_second_order_terms(dim)
elif name == "diagonal":
out += dim
else:
raise ValueError()
return out


@njit
def n_second_order_terms(dim):
"""Number of free second order terms in a quadratic model."""
return dim * (dim + 1) // 2


@njit
def n_interactions(dim):
"""Number of free interaction terms in a quadratic model."""
return dim * (dim - 1) // 2


def is_second_order_model(model_or_info):
"""Check if a model has any second order terms."""
if isinstance(model_or_info, ModelInfo):
model_info = model_or_info
out = model_info.has_interactions or model_info.has_squares
elif isinstance(model_or_info, (ScalarModel, VectorModel)):
model = model_or_info
out = model.square_terms is not None
else:
raise TypeError()
return out
11 changes: 5 additions & 6 deletions src/estimagic/optimization/tranquilo/tranquilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from estimagic.optimization.tranquilo.filter_points import get_sample_filter
from estimagic.optimization.tranquilo.fit_models import get_fitter
from estimagic.optimization.tranquilo.models import ModelInfo
from estimagic.optimization.tranquilo.models import n_free_params
from estimagic.optimization.tranquilo.models import ScalarModel
from estimagic.optimization.tranquilo.options import Bounds
from estimagic.optimization.tranquilo.options import ConvOptions
Expand Down Expand Up @@ -220,7 +221,6 @@ def _tranquilo(

scalar_model = aggregate_vector_model(
vector_model=vector_model,
fvec_center=state.fvec,
)

sub_sol = solve_subproblem(model=scalar_model, trustregion=trustregion)
Expand Down Expand Up @@ -292,7 +292,6 @@ def _calculate_rho(actual_improvement, expected_improvement):
rho = -np.inf
else:
rho = actual_improvement / expected_improvement

return rho


Expand Down Expand Up @@ -349,10 +348,10 @@ def _process_surrogate_model(surrogate_model, functype):
elif isinstance(surrogate_model, str):
if surrogate_model == "linear":
out = ModelInfo(has_squares=False, has_interactions=False)
elif surrogate_model == "quadratic":
out = ModelInfo(has_squares=True, has_interactions=True)
elif surrogate_model == "diagonal":
out = ModelInfo(has_squares=True, has_interactions=False)
elif surrogate_model == "quadratic":
out = ModelInfo(has_squares=True, has_interactions=True)
else:
raise ValueError(f"Invalid surrogate model: {surrogate_model}")

Expand All @@ -371,11 +370,11 @@ def _process_sample_size(user_sample_size, model_info, x):
elif isinstance(user_sample_size, str):
user_sample_size = user_sample_size.replace(" ", "")
if user_sample_size in ["linear", "n+1"]:
out = len(x) + 1
out = n_free_params(dim=len(x), info_or_name="linear")
elif user_sample_size in ["powell", "2n+1", "2*n+1"]:
out = 2 * len(x) + 1
elif user_sample_size == "quadratic":
out = len(x) * (len(x) + 1) // 2 + len(x) + 1
out = n_free_params(dim=len(x), info_or_name="quadratic")
else:
raise ValueError(f"Invalid sample size: {user_sample_size}")

Expand Down
Loading

0 comments on commit e0b5b6c

Please sign in to comment.