Skip to content
35 changes: 35 additions & 0 deletions autoemulate/experimental/emulators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import torch
from sklearn.base import BaseEstimator
from torch import nn, optim
from torch.distributions import TransformedDistribution
from torch.optim.lr_scheduler import ExponentialLR, LRScheduler

from autoemulate.experimental.data.preprocessors import Preprocessor
from autoemulate.experimental.data.utils import ConversionMixin, ValidationMixin
from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import (
DistributionLike,
GaussianLike,
Expand All @@ -31,13 +33,29 @@ class Emulator(ABC, ValidationMixin, ConversionMixin, TorchDeviceMixin):
is_fitted_: bool = False
supports_grad: bool = False
scheduler_cls: type[optim.lr_scheduler.LRScheduler] | None = None
x_transform: StandardizeTransform | None = None
y_transform: StandardizeTransform | None = None

@abstractmethod
def _fit(self, x: TensorLike, y: TensorLike): ...

def fit(self, x: TensorLike, y: TensorLike):
self._check(x, y)
# Ensure x and y are tensors and 2D
x, y = self._convert_to_tensors(x, y)

# Move to device
x, y = self._move_tensors_to_device(x, y)

# Fit transforms
if self.x_transform is not None:
self.x_transform.fit(x)
if self.y_transform is not None:
self.y_transform.fit(y)
x = self.x_transform(x) if self.x_transform is not None else x
y = self.y_transform(y) if self.y_transform is not None else y

# Fit emulator
self._fit(x, y)
self.is_fitted_ = True

Expand Down Expand Up @@ -69,7 +87,24 @@ def predict(self, x: TensorLike, with_grad: bool = False) -> OutputLike:
raise RuntimeError(msg)
self._check(x, None)
(x,) = self._move_tensors_to_device(x)
x = self.x_transform(x) if self.x_transform is not None else x
output = self._predict(x, with_grad)
if self.y_transform is not None:
if isinstance(output, GaussianLike):
output = self.y_transform._inverse_gaussian(output)
elif isinstance(output, DistributionLike):
output = TransformedDistribution(
output, transforms=[self.y_transform.inv]
)
elif isinstance(output, TensorLike):
output = self.y_transform.inv(output)
else:
msg = (
"Output type not supported for transformation. "
f"Got {type(output)} but expected GaussianLike, "
"DistributionLike, or TensorLike."
)
raise TypeError(msg)
self._check_output(output)
return output

Expand Down
37 changes: 32 additions & 5 deletions autoemulate/experimental/emulators/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
GaussianEmulator,
)
from autoemulate.experimental.emulators.nn.mlp import MLP
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.transforms.utils import make_positive_definite
from autoemulate.experimental.types import (
DeviceLike,
Expand Down Expand Up @@ -125,10 +126,12 @@ def _predict(self, x: Tensor, with_grad: bool) -> GaussianLike:


class EnsembleMLP(Ensemble):
def __init__(
def __init__( # noqa: PLR0913
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = True,
standardize_y: bool = True,
n_emulators: int = 4,
device: DeviceLike | None = None,
**mlp_kwargs,
Expand All @@ -149,7 +152,17 @@ def __init__(
**mlp_kwargs: dict
Additional keyword arguments for the MLP constructor.
"""
emulators = [MLP(x, y, device=device, **mlp_kwargs) for i in range(n_emulators)]
emulators = [
MLP(
x,
y,
standardize_x=standardize_x,
standardize_y=standardize_y,
device=device,
**mlp_kwargs,
)
for i in range(n_emulators)
]
super().__init__(emulators, device=device)

@staticmethod
Expand All @@ -163,9 +176,11 @@ class DropoutEnsemble(GaussianEmulator, TorchDeviceMixin):
and compute mean + epistemic covariance across them.
"""

def __init__(
def __init__( # noqa: PLR0913
self,
model: DropoutTorchBackend,
standardize_x: bool = True,
standardize_y: bool = True,
n_samples: int = 20,
jitter: float = 1e-4,
device: DeviceLike | None = None,
Expand All @@ -186,6 +201,8 @@ def __init__(
TorchDeviceMixin.__init__(self, device=device)
assert n_samples > 0
self.model = model.to(self.device)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.n_samples = n_samples
self.is_fitted_ = model.is_fitted_
self.jitter = jitter
Expand Down Expand Up @@ -251,10 +268,12 @@ def _predict(self, x: Tensor, with_grad: bool) -> GaussianLike:


class EnsembleMLPDropout(DropoutEnsemble):
def __init__(
def __init__( # noqa: PLR0913
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = True,
standardize_y: bool = True,
dropout_prob: float = 0.2,
device: DeviceLike | None = None,
**mlp_kwargs,
Expand All @@ -276,7 +295,15 @@ def __init__(
Additional keyword arguments for the MLP constructor.
"""
super().__init__(
MLP(x, y, dropout_prob=dropout_prob, device=device, **mlp_kwargs),
MLP(
x,
y,
standardize_x=standardize_x,
standardize_y=standardize_y,
dropout_prob=dropout_prob,
device=device,
**mlp_kwargs,
),
device=device,
)

Expand Down
13 changes: 9 additions & 4 deletions autoemulate/experimental/emulators/gaussian_process/exact.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
CovarModuleFn,
MeanModuleFn,
)
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.transforms.utils import make_positive_definite
from autoemulate.experimental.types import (
DeviceLike,
Expand Down Expand Up @@ -61,6 +62,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = False,
standardize_y: bool = True,
likelihood_cls: type[MultitaskGaussianLikelihood] = MultitaskGaussianLikelihood,
mean_module_fn: MeanModuleFn = constant_mean,
covar_module_fn: CovarModuleFn = rbf_plus_constant,
Expand Down Expand Up @@ -133,6 +136,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self, train_inputs=x, train_targets=y, likelihood=likelihood
)
self.likelihood = likelihood
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.mean_module = mean_module
self.covar_module = covar_module
self.epochs = epochs
Expand All @@ -159,10 +164,6 @@ def forward(self, x: TensorLike):
def _fit(self, x: TensorLike, y: TensorLike):
self.train()
self.likelihood.train()
x, y = self._move_tensors_to_device(x, y)

# TODO: move conversion out of _fit() and instead rely on for impl check
x, y = self._convert_to_tensors(x, y)

mll = ExactMarginalLogLikelihood(self.likelihood, self)

Expand Down Expand Up @@ -316,6 +317,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = False,
standardize_y: bool = True,
likelihood_cls: type[MultitaskGaussianLikelihood] = MultitaskGaussianLikelihood,
mean_module_fn: MeanModuleFn = constant_mean,
covar_module_fn: CovarModuleFn = rbf_plus_constant,
Expand Down Expand Up @@ -399,6 +402,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
train_targets=y,
likelihood=likelihood,
)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.likelihood = likelihood
self.mean_module = mean_module
self.covar_module = covar_module
Expand Down
5 changes: 5 additions & 0 deletions autoemulate/experimental/emulators/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.emulators.base import SklearnBackend
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, TensorLike


Expand All @@ -20,6 +21,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = False,
standardize_y: bool = False,
loss: Literal[
"squared_error", "absolute_error", "huber", "quantile"
] = "squared_error",
Expand All @@ -38,6 +41,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
"""Initializes a GradientBoosting object."""
_, _ = x, y # ignore unused arguments
TorchDeviceMixin.__init__(self, device=device, cpu_only=True)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.loss = loss
self.learning_rate = learning_rate
self.n_estimators = n_estimators
Expand Down
5 changes: 5 additions & 0 deletions autoemulate/experimental/emulators/lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.emulators.base import DeterministicEmulator
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, TensorLike


Expand All @@ -19,6 +20,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self,
x: TensorLike | None = None,
y: TensorLike | None = None,
standardize_x: bool = False,
standardize_y: bool = False,
boosting_type: str = "gbdt",
num_leaves: int = 31,
max_depth: int = -1,
Expand All @@ -43,6 +46,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
"""Initializes a LightGBM object."""
_, _ = x, y # ignore unused arguments
TorchDeviceMixin.__init__(self, device=device)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.boosting_type = boosting_type
self.num_leaves = num_leaves
self.max_depth = max_depth
Expand Down
5 changes: 5 additions & 0 deletions autoemulate/experimental/emulators/nn/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from autoemulate.experimental.data.utils import set_random_seed
from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, TensorLike

from ..base import DropoutTorchBackend
Expand All @@ -12,6 +13,8 @@ def __init__( # noqa: PLR0913
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = True,
standardize_y: bool = True,
activation_cls: type[nn.Module] = nn.ReLU,
loss_fn_cls: type[nn.Module] = nn.MSELoss,
epochs: int = 100,
Expand Down Expand Up @@ -96,6 +99,8 @@ def __init__( # noqa: PLR0913

# Finalize initialization
self._initialize_weights(weight_init, scale, bias_init)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.epochs = epochs
self.loss_fn = loss_fn_cls()
self.lr = lr
Expand Down
5 changes: 5 additions & 0 deletions autoemulate/experimental/emulators/polynomials.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from autoemulate.experimental.data.utils import set_random_seed
from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.emulators.base import PyTorchBackend
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, TensorLike


Expand All @@ -20,6 +21,8 @@ def __init__( # noqa: PLR0913
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = False,
standardize_y: bool = False,
Comment on lines +24 to +25
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth setting the default here to True (at least for the y) like with the MLP?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, I'm not sure it makes a big difference in this case and v0 doesn't standardise either.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From discussing, we think that having the defaults as False remains worthwhile so that users can opt-in to standardization and search over it in the model comparison loop still (e.g. x_transforms_list=[[], [StandardizeTransform()]])

degree: int = 2,
lr: float = 0.1,
epochs: int = 500,
Expand All @@ -32,6 +35,8 @@ def __init__( # noqa: PLR0913
TorchDeviceMixin.__init__(self, device=device)
if random_seed is not None:
set_random_seed(seed=random_seed)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.degree = degree
self.lr = lr
self.epochs = epochs
Expand Down
6 changes: 6 additions & 0 deletions autoemulate/experimental/emulators/radial_basis_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.emulators.base import PyTorchBackend
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, OutputLike, TensorLike


Expand All @@ -21,6 +22,8 @@ def __init__( # noqa: PLR0913
self,
x: TensorLike, # noqa: ARG002
y: TensorLike, # noqa: ARG002
standardize_x: bool = False,
standardize_y: bool = False,
smoothing: float = 0.0,
kernel: str = "thin_plate_spline",
epsilon: float = 1.0,
Expand All @@ -30,6 +33,9 @@ def __init__( # noqa: PLR0913
"""Initializes a RadialBasisFunctions object."""
super().__init__()
TorchDeviceMixin.__init__(self, device=device)

self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.smoothing = smoothing
self.kernel = kernel
self.epsilon = epsilon
Expand Down
5 changes: 5 additions & 0 deletions autoemulate/experimental/emulators/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from autoemulate.experimental.device import TorchDeviceMixin
from autoemulate.experimental.emulators.base import SklearnBackend
from autoemulate.experimental.transforms.standardize import StandardizeTransform
from autoemulate.experimental.types import DeviceLike, TensorLike


Expand All @@ -17,6 +18,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
self,
x: TensorLike,
y: TensorLike,
standardize_x: bool = False,
standardize_y: bool = False,
n_estimators: int = 100,
criterion: str = "squared_error",
max_depth: int | None = None,
Expand All @@ -33,6 +36,8 @@ def __init__( # noqa: PLR0913 allow too many arguments since all currently requ
"""Initializes a RandomForest object."""
_, _ = x, y # ignore unused arguments
TorchDeviceMixin.__init__(self, device=device, cpu_only=True)
self.x_transform = StandardizeTransform() if standardize_x else None
self.y_transform = StandardizeTransform() if standardize_y else None
self.n_estimators = n_estimators
self.criterion = criterion
self.max_depth = max_depth
Expand Down
Loading
Loading