diff --git a/docs/notebooks/efficient_posterior_sampling.py b/docs/notebooks/efficient_posterior_sampling.py index 49937e90..cb9a561b 100644 --- a/docs/notebooks/efficient_posterior_sampling.py +++ b/docs/notebooks/efficient_posterior_sampling.py @@ -62,7 +62,7 @@ that combines both feature approximations and exact kernel evaluations from the Matheron function and weight space approximation formulas above. -The subsequent experiments demonstrate the qualitative efficiency of the hybrid rule when compared to the vanilla Matheron weight space approximation, in terms of the Wasserstein distance to the exact posterior GP. To conduct these experiments, the required classes in `gpflux` are `RandomFourierFeatures`, to approximate a stationary kernel with finitely many random Fourier features $\phi_d(\cdot)$ according to Bochner's theorem and following Rahimi and Recht "Random features for large-scale kernel machines" (NeurIPS, 2007), and `KernelWithFeatureDecomposition`, to approximate a kernel with a specified set of feature functions. +The subsequent experiments demonstrate the qualitative efficiency of the hybrid rule when compared to the vanilla Matheron weight space approximation, in terms of the Wasserstein distance to the exact posterior GP. To conduct these experiments, the required classes in `gpflux` are `RandomFourierFeaturesCosine`, to approximate a stationary kernel with finitely many random Fourier features $\phi_d(\cdot)$ according to Bochner's theorem and following Rahimi and Recht "Random features for large-scale kernel machines" (NeurIPS, 2007), and `KernelWithFeatureDecomposition`, to approximate a kernel with a specified set of feature functions. """ # %% @@ -79,7 +79,7 @@ from gpflow.kernels import RBF, Matern52 from gpflow.models import GPR -from gpflux.layers.basis_functions.random_fourier_features import RandomFourierFeatures +from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine from gpflux.sampling.kernel_with_feature_decomposition import KernelWithFeatureDecomposition # %% [markdown] @@ -253,9 +253,9 @@ def conduct_experiment(num_input_dimensions, num_train_samples, num_features): exact_kernel = kernel_class(lengthscales=lengthscale) # weight space approximated kernel - feature_functions = RandomFourierFeatures( + feature_functions = RandomFourierFeaturesCosine( kernel=kernel_class(lengthscales=lengthscale), - output_dim=num_features, + n_components=num_features, dtype=default_float(), ) feature_coefficients = np.ones((num_features, 1), dtype=default_float()) diff --git a/docs/notebooks/efficient_sampling.py b/docs/notebooks/efficient_sampling.py index 79420344..990a1cf4 100644 --- a/docs/notebooks/efficient_sampling.py +++ b/docs/notebooks/efficient_sampling.py @@ -36,7 +36,7 @@ from gpflow.config import default_float -from gpflux.layers.basis_functions.random_fourier_features import RandomFourierFeatures +from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine from gpflux.sampling import KernelWithFeatureDecomposition from gpflux.models.deep_gp import sample_dgp @@ -71,7 +71,7 @@ gpflow.utilities.set_trainable(inducing_variable, False) num_rff = 1000 -eigenfunctions = RandomFourierFeatures(kernel, num_rff, dtype=default_float()) +eigenfunctions = RandomFourierFeaturesCosine(kernel, num_rff, dtype=default_float()) eigenvalues = np.ones((num_rff, 1), dtype=default_float()) kernel_with_features = KernelWithFeatureDecomposition(kernel, eigenfunctions, eigenvalues) diff --git a/docs/notebooks/weight_space_approximation.py b/docs/notebooks/weight_space_approximation.py index 0f7a5bc2..ef4b4fe4 100644 --- a/docs/notebooks/weight_space_approximation.py +++ b/docs/notebooks/weight_space_approximation.py @@ -42,7 +42,7 @@ The advantage of expressing a Gaussian process in weight space is that functions are represented as weight vectors $\textbf{w}$ (rather than actual functions $f(\cdot)$) from which samples can be obtained a priori without knowing where the function should be evaluated. When expressing a Gaussian process in function space view the latter is not possible, i.e. a function $f(\cdot)$ cannot be sampled without knowing where to evaluate the function, namely at $\{X_{n^\star}^\star\}_{n^\star=1,...,N^\star}$. Weight space approximated Gaussian processes therefore hold the potential to sample efficiently from Gaussian process posteriors, which is desirable in vanilla supervised learning but also in domains such as Bayesian optimisation or model-based reinforcement learning. -In the following example, we compare a weight space approximated GPR model (WSA model) with both a proper GPR model and a sparse variational Gaussian Process model (SVGP). GPR models and SVGP models are implemented in `gpflow`, but the two necessary ingredients for building the WSA model are part of `gpflux`: these are random Fourier feature functions via the `RandomFourierFeatures` class, and approximate kernels based on Bochner's theorem (or any other theorem that approximates a kernel with a finite number of feature functions, e.g. Mercer) via the `KernelWithFeatureDecomposition` class. +In the following example, we compare a weight space approximated GPR model (WSA model) with both a proper GPR model and a sparse variational Gaussian Process model (SVGP). GPR models and SVGP models are implemented in `gpflow`, but the two necessary ingredients for building the WSA model are part of `gpflux`: these are random Fourier feature functions via the `RandomFourierFeaturesCosine` class, and approximate kernels based on Bochner's theorem (or any other theorem that approximates a kernel with a finite number of feature functions, e.g. Mercer) via the `KernelWithFeatureDecomposition` class. """ # %% @@ -61,7 +61,7 @@ from gpflow.likelihoods import Gaussian from gpflow.inducing_variables import InducingPoints -from gpflux.layers.basis_functions.random_fourier_features import RandomFourierFeatures +from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine from gpflux.sampling.kernel_with_feature_decomposition import KernelWithFeatureDecomposition # %% [markdown] @@ -77,7 +77,7 @@ # experiment parameters that are the same for both sets of experiments X_interval = [0.14, 0.5] # interval where training points live lengthscale = 0.1 # lengthscale for the kernel (which is not learned in all experiments, the kernel variance is 1) -number_of_basis_functions = 2000 # number of basis functions for weight-space approximated kernels +number_of_features = 2000 # number of basis functions for weight-space approximated kernels noise_variance = 1e-3 # noise variance of the likelihood (which is not learned in all experiments) number_of_test_samples = 1024 # number of evaluation points for prediction number_of_function_samples = ( @@ -264,12 +264,12 @@ def optimize_model_with_scipy(model): ) # create exact GPR model with weight-space approximated kernel (WSA model) - feature_functions = RandomFourierFeatures( + feature_functions = RandomFourierFeaturesCosine( kernel=kernel_class(lengthscales=lengthscale), - output_dim=number_of_basis_functions, + n_components=number_of_features, dtype=default_float(), ) - feature_coefficients = np.ones((number_of_basis_functions, 1), dtype=default_float()) + feature_coefficients = np.ones((number_of_features, 1), dtype=default_float()) kernel = KernelWithFeatureDecomposition( kernel=None, feature_functions=feature_functions, feature_coefficients=feature_coefficients ) diff --git a/gpflux/layers/basis_functions/__init__.py b/gpflux/layers/basis_functions/__init__.py index ce7890de..57bdf35f 100644 --- a/gpflux/layers/basis_functions/__init__.py +++ b/gpflux/layers/basis_functions/__init__.py @@ -14,8 +14,5 @@ # limitations under the License. # """ -A kernel's features for efficient sampling, used by -:class:`gpflux.sampling.KernelWithFeatureDecomposition` +Basis functions. """ - -from gpflux.layers.basis_functions.random_fourier_features import RandomFourierFeatures diff --git a/gpflux/layers/basis_functions/fourier_features/__init__.py b/gpflux/layers/basis_functions/fourier_features/__init__.py new file mode 100644 index 00000000..54a7daaf --- /dev/null +++ b/gpflux/layers/basis_functions/fourier_features/__init__.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2021 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" +A kernel's features for efficient sampling, used by +:class:`gpflux.sampling.KernelWithFeatureDecomposition` +""" + +from gpflux.layers.basis_functions.fourier_features.quadrature import QuadratureFourierFeatures +from gpflux.layers.basis_functions.fourier_features.random import ( + RandomFourierFeatures, + RandomFourierFeaturesCosine, +) + +__all__ = ["QuadratureFourierFeatures", "RandomFourierFeatures", "RandomFourierFeaturesCosine"] diff --git a/gpflux/layers/basis_functions/fourier_features/base.py b/gpflux/layers/basis_functions/fourier_features/base.py new file mode 100644 index 00000000..80461c80 --- /dev/null +++ b/gpflux/layers/basis_functions/fourier_features/base.py @@ -0,0 +1,101 @@ +# +# Copyright (c) 2021 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" Shared functionality for stationary kernel basis functions. """ + +from abc import ABC, abstractmethod +from typing import Mapping + +import tensorflow as tf + +import gpflow +from gpflow.base import TensorType + +from gpflux.types import ShapeType + + +class FourierFeaturesBase(ABC, tf.keras.layers.Layer): + def __init__(self, kernel: gpflow.kernels.Kernel, n_components: int, **kwargs: Mapping): + """ + :param kernel: kernel to approximate using a set of Fourier bases. + :param n_components: number of components (e.g. Monte Carlo samples, + quadrature nodes, etc.) used to numerically approximate the kernel. + """ + super(FourierFeaturesBase, self).__init__(**kwargs) + self.kernel = kernel + self.n_components = n_components + if kwargs.get("input_dim", None): + self._input_dim = kwargs["input_dim"] + self.build(tf.TensorShape([self._input_dim])) + else: + self._input_dim = None + + def call(self, inputs: TensorType) -> tf.Tensor: + """ + Evaluate the basis functions at ``inputs``. + + :param inputs: The evaluation points, a tensor with the shape ``[N, D]``. + + :return: A tensor with the shape ``[N, M]``. + """ + X = tf.divide(inputs, self.kernel.lengthscales) # [N, D] + const = self._compute_constant() + bases = self._compute_bases(X) + output = const * bases + tf.ensure_shape(output, self.compute_output_shape(inputs.shape)) + return output + + def compute_output_shape(self, input_shape: ShapeType) -> tf.TensorShape: + """ + Computes the output shape of the layer. + See `tf.keras.layers.Layer.compute_output_shape() + `_. + """ + # TODO: Keras docs say "If the layer has not been built, this method + # will call `build` on the layer." -- do we need to do so? + tensor_shape = tf.TensorShape(input_shape).with_rank(2) + output_dim = self._compute_output_dim(input_shape) + return tensor_shape[:-1].concatenate(output_dim) + + def get_config(self) -> Mapping: + """ + Returns the config of the layer. + See `tf.keras.layers.Layer.get_config() + `_. + """ + config = super(FourierFeaturesBase, self).get_config() + config.update( + {"kernel": self.kernel, "n_components": self.n_components, "input_dim": self._input_dim} + ) + + return config + + @abstractmethod + def _compute_output_dim(self, input_shape: ShapeType) -> int: + pass + + @abstractmethod + def _compute_constant(self) -> tf.Tensor: + """ + Compute normalizing constant for basis functions. + """ + pass + + @abstractmethod + def _compute_bases(self, inputs: TensorType) -> tf.Tensor: + """ + Compute basis functions. + """ + pass diff --git a/gpflux/layers/basis_functions/fourier_features/quadrature.py b/gpflux/layers/basis_functions/fourier_features/quadrature.py new file mode 100644 index 00000000..498beff3 --- /dev/null +++ b/gpflux/layers/basis_functions/fourier_features/quadrature.py @@ -0,0 +1,81 @@ +# +# Copyright (c) 2021 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" A kernel's features and coefficients using quadrature Fourier features (QFF). """ + +import warnings +from typing import Mapping + +import tensorflow as tf + +import gpflow +from gpflow.base import TensorType +from gpflow.quadrature.gauss_hermite import ndgh_points_and_weights + +from gpflux.layers.basis_functions.fourier_features.base import FourierFeaturesBase +from gpflux.layers.basis_functions.fourier_features.utils import ( + QFF_SUPPORTED_KERNELS, + _bases_concat, +) +from gpflux.types import ShapeType + + +class QuadratureFourierFeatures(FourierFeaturesBase): + def __init__(self, kernel: gpflow.kernels.Kernel, n_components: int, **kwargs: Mapping): + assert isinstance(kernel, QFF_SUPPORTED_KERNELS), "Unsupported Kernel" + if tf.reduce_any(tf.less(kernel.lengthscales, 1e-1)): + warnings.warn( + "Quadrature Fourier feature approximation of kernels " + "with small lengthscale lead to unexpected behaviors!" + ) + super(QuadratureFourierFeatures, self).__init__(kernel, n_components, **kwargs) + + def build(self, input_shape: ShapeType) -> None: + """ + Creates the variables of the layer. + See `tf.keras.layers.Layer.build() + `_. + """ + input_dim = input_shape[-1] + abscissa_value, omegas_value = ndgh_points_and_weights( + dim=input_dim, n_gh=self.n_components + ) + omegas_value = tf.squeeze(omegas_value, axis=-1) + + # Quadrature node points + self.abscissa = tf.Variable(initial_value=abscissa_value, trainable=False) # (M^D, D) + # Gauss-Hermite weights + self.factors = tf.Variable(initial_value=omegas_value, trainable=False) # (M^D,) + super(QuadratureFourierFeatures, self).build(input_shape) + + def _compute_output_dim(self, input_shape: ShapeType) -> int: + input_dim = input_shape[-1] + return 2 * self.n_components ** input_dim + + def _compute_bases(self, inputs: TensorType) -> tf.Tensor: + """ + Compute basis functions. + + :return: A tensor with the shape ``[N, 2M^D]``. + """ + return _bases_concat(inputs, self.abscissa) + + def _compute_constant(self) -> tf.Tensor: + """ + Compute normalizing constant for basis functions. + + :return: A tensor with the shape ``[2M^D,]`` + """ + return tf.tile(tf.sqrt(self.kernel.variance * self.factors), multiples=[2]) diff --git a/gpflux/layers/basis_functions/random_fourier_features.py b/gpflux/layers/basis_functions/fourier_features/random.py similarity index 58% rename from gpflux/layers/basis_functions/random_fourier_features.py rename to gpflux/layers/basis_functions/fourier_features/random.py index bba25995..e6b14c70 100644 --- a/gpflux/layers/basis_functions/random_fourier_features.py +++ b/gpflux/layers/basis_functions/fourier_features/random.py @@ -23,40 +23,31 @@ import gpflow from gpflow.base import DType, TensorType -from gpflux.layers.basis_functions.utils import ( +from gpflux.layers.basis_functions.fourier_features.base import FourierFeaturesBase +from gpflux.layers.basis_functions.fourier_features.utils import ( RFF_SUPPORTED_KERNELS, - _mapping_concat, - _mapping_cosine, + _bases_concat, + _bases_cosine, _matern_number, _sample_students_t, ) from gpflux.types import ShapeType -""" -Kernels supported by :class:`RandomFourierFeatures`. - -You can build RFF for shift-invariant stationary kernels from which you can -sample frequencies from their power spectrum, following Bochner's theorem. -""" +class RandomFourierFeaturesBase(FourierFeaturesBase): + def __init__(self, kernel: gpflow.kernels.Kernel, n_components: int, **kwargs: Mapping): + assert isinstance(kernel, RFF_SUPPORTED_KERNELS), "Unsupported Kernel" + super(RandomFourierFeaturesBase, self).__init__(kernel, n_components, **kwargs) -class RandomFourierFeaturesBase(tf.keras.layers.Layer): - def __init__(self, kernel: gpflow.kernels.Kernel, output_dim: int, **kwargs: Mapping): + def build(self, input_shape: ShapeType) -> None: """ - :param kernel: kernel to approximate using a set of random features. - :param output_dim: total number of basis functions used to approximate - the kernel. + Creates the variables of the layer. + See `tf.keras.layers.Layer.build() + `_. """ - super().__init__(**kwargs) - - assert isinstance(kernel, RFF_SUPPORTED_KERNELS), "Unsupported Kernel" - self.kernel = kernel - self.output_dim = output_dim # M - if kwargs.get("input_dim", None): - self._input_dim = kwargs["input_dim"] - self.build(tf.TensorShape([self._input_dim])) - else: - self._input_dim = None + input_dim = input_shape[-1] + self._weights_build(input_dim, n_components=self.n_components) + super(RandomFourierFeaturesBase, self).build(input_shape) def _weights_build(self, input_dim: int, n_components: int) -> None: shape = (n_components, input_dim) @@ -76,29 +67,12 @@ def _weights_init(self, shape: TensorType, dtype: Optional[DType] = None) -> Ten nu = 2.0 * p + 1.0 # degrees of freedom return _sample_students_t(nu, shape, dtype) - def compute_output_shape(self, input_shape: ShapeType) -> tf.TensorShape: - """ - Computes the output shape of the layer. - See `tf.keras.layers.Layer.compute_output_shape() - `_. + @staticmethod + def rff_constant(variance: TensorType, output_dim: int) -> tf.Tensor: """ - # TODO: Keras docs say "If the layer has not been built, this method - # will call `build` on the layer." -- do we need to do so? - tensor_shape = tf.TensorShape(input_shape).with_rank(2) - return tensor_shape[:-1].concatenate(self.output_dim) - - def get_config(self) -> Mapping: + Normalizing constant for random Fourier features. """ - Returns the config of the layer. - See `tf.keras.layers.Layer.get_config() - `_. - """ - config = super().get_config() - config.update( - {"kernel": self.kernel, "output_dim": self.output_dim, "input_dim": self._input_dim} - ) - - return config + return tf.sqrt(tf.math.truediv(2.0 * variance, output_dim)) class RandomFourierFeatures(RandomFourierFeaturesBase): @@ -131,47 +105,28 @@ class RandomFourierFeatures(RandomFourierFeaturesBase): where :math:`p(\boldsymbol{\theta})` is the spectral density of the kernel. At least for the squared exponential kernel, this variant of the feature - mapping has more desirable theoretical properties than its cosine-based - counterpart :class:`RandomFourierFeaturesCosine` :cite:p:`sutherland2015error`. + mapping has more desirable theoretical properties than its counterpart form + from phase-shifted cosines :class:`RandomFourierFeaturesCosine` :cite:p:`sutherland2015error`. """ - def __init__(self, kernel: gpflow.kernels.Kernel, output_dim: int, **kwargs: Mapping): - """ - :param kernel: kernel to approximate using a set of random features. - :param output_dim: total number of basis functions used to approximate - the kernel. - """ - assert not output_dim % 2, "must specify an even number of random features" - super().__init__(kernel, output_dim, **kwargs) + def _compute_output_dim(self, input_shape: ShapeType) -> int: + return 2 * self.n_components - def build(self, input_shape: ShapeType) -> None: + def _compute_bases(self, inputs: TensorType) -> tf.Tensor: """ - Creates the variables of the layer. - See `tf.keras.layers.Layer.build() - `_. - """ - input_dim = input_shape[-1] - self._weights_build(input_dim, n_components=self.output_dim // 2) + Compute basis functions. - super().build(input_shape) - - def call(self, inputs: TensorType) -> tf.Tensor: + :return: A tensor with the shape ``[N, 2M]``. """ - Evaluate the basis functions at ``inputs``. + return _bases_concat(inputs, self.W) - :param inputs: The evaluation points, a tensor with the shape ``[N, D]``. + def _compute_constant(self) -> tf.Tensor: + """ + Compute normalizing constant for basis functions. - :return: A tensor with the shape ``[N, M]``. + :return: A tensor with the shape ``[]`` (i.e. a scalar). """ - output = _mapping_concat( - inputs, - self.W, - variance=self.kernel.variance, - lengthscales=self.kernel.lengthscales, - n_components=self.output_dim, - ) - tf.ensure_shape(output, self.compute_output_shape(inputs.shape)) - return output + return self.rff_constant(self.kernel.variance, output_dim=2 * self.n_components) class RandomFourierFeaturesCosine(RandomFourierFeaturesBase): @@ -201,7 +156,7 @@ class RandomFourierFeaturesCosine(RandomFourierFeaturesBase): where :math:`p(\boldsymbol{\theta})` is the spectral density of the kernel - :math:`\tau \sim \mathcal{U}(0, 2\pi)` - Equivalent to :class:`RandomFourierFeatures` by elementary trignometric identities. + Equivalent to :class:`RandomFourierFeatures` by elementary trigonometric identities. """ def build(self, input_shape: ShapeType) -> None: @@ -210,11 +165,8 @@ def build(self, input_shape: ShapeType) -> None: See `tf.keras.layers.Layer.build() `_. """ - input_dim = input_shape[-1] - self._weights_build(input_dim, n_components=self.output_dim) - self._bias_build(n_components=self.output_dim) - - super().build(input_shape) + self._bias_build(n_components=self.n_components) + super(RandomFourierFeaturesCosine, self).build(input_shape) def _bias_build(self, n_components: int) -> None: shape = (1, n_components) @@ -229,21 +181,21 @@ def _bias_build(self, n_components: int) -> None: def _bias_init(self, shape: TensorType, dtype: Optional[DType] = None) -> TensorType: return tf.random.uniform(shape=shape, maxval=2.0 * np.pi, dtype=dtype) - def call(self, inputs: TensorType) -> tf.Tensor: - """ - Evaluate the basis functions at ``inputs``. + def _compute_output_dim(self, input_shape: ShapeType) -> int: + return self.n_components - :param inputs: The evaluation points, a tensor with the shape ``[N, D]``. + def _compute_bases(self, inputs: TensorType) -> tf.Tensor: + """ + Compute basis functions. :return: A tensor with the shape ``[N, M]``. """ - output = _mapping_cosine( - inputs, - self.W, - self.b, - variance=self.kernel.variance, - lengthscales=self.kernel.lengthscales, - n_components=self.output_dim, - ) - tf.ensure_shape(output, self.compute_output_shape(inputs.shape)) - return output + return _bases_cosine(inputs, self.W, self.b) + + def _compute_constant(self) -> tf.Tensor: + """ + Compute normalizing constant for basis functions. + + :return: A tensor with the shape ``[]`` (i.e. a scalar). + """ + return self.rff_constant(self.kernel.variance, output_dim=self.n_components) diff --git a/gpflux/layers/basis_functions/utils.py b/gpflux/layers/basis_functions/fourier_features/utils.py similarity index 75% rename from gpflux/layers/basis_functions/utils.py rename to gpflux/layers/basis_functions/fourier_features/utils.py index 09058436..6959a96c 100644 --- a/gpflux/layers/basis_functions/utils.py +++ b/gpflux/layers/basis_functions/fourier_features/utils.py @@ -25,6 +25,22 @@ from gpflux.types import ShapeType +""" +Kernels supported by :class:`QuadratureFourierFeatures`. + +Currently we only support the :class:`gpflow.kernels.SquaredExponential` kernel. +For Matern kernels use :class:`RandomFourierFeatures`. +""" +QFF_SUPPORTED_KERNELS: Tuple[Type[gpflow.kernels.Stationary], ...] = ( + gpflow.kernels.SquaredExponential, +) + +""" +Kernels supported by :class:`RandomFourierFeatures`. + +You can build RFF for shift-invariant stationary kernels from which you can +sample frequencies from their power spectrum, following Bochner's theorem. +""" RFF_SUPPORTED_KERNELS: Tuple[Type[gpflow.kernels.Stationary], ...] = ( gpflow.kernels.SquaredExponential, gpflow.kernels.Matern12, @@ -74,40 +90,21 @@ def _sample_students_t(nu: float, shape: ShapeType, dtype: DType) -> TensorType: return students_t_rvs -def _mapping_cosine( - X: TensorType, - W: TensorType, - b: TensorType, - variance: TensorType, - lengthscales: TensorType, - n_components: int, -) -> TensorType: +def _bases_cosine(X: TensorType, W: TensorType, b: TensorType) -> TensorType: """ Feature map for random Fourier features (RFF) as originally prescribed by Rahimi & Recht, 2007 :cite:p:`rahimi2007random`. See also :cite:p:`sutherland2015error` for additional details. """ - constant = tf.sqrt(2.0 * variance / n_components) - X_scaled = tf.divide(X, lengthscales) # [N, D] - proj = tf.matmul(X_scaled, W, transpose_b=True) # [N, M] - bases = tf.cos(proj + b) # [N, M] - return constant * bases # [N, M] - - -def _mapping_concat( - X: TensorType, - W: TensorType, - variance: TensorType, - lengthscales: TensorType, - n_components: int, -) -> TensorType: + proj = tf.matmul(X, W, transpose_b=True) + b # [N, M] + return tf.cos(proj) # [N, M] + + +def _bases_concat(X: TensorType, W: TensorType) -> TensorType: """ Feature map for random Fourier features (RFF) as originally prescribed by Rahimi & Recht, 2007 :cite:p:`rahimi2007random`. See also :cite:p:`sutherland2015error` for additional details. """ - constant = tf.sqrt(2.0 * variance / n_components) - X_scaled = tf.divide(X, lengthscales) # [N, D] - proj = tf.matmul(X_scaled, W, transpose_b=True) # [N, M // 2] - bases = tf.concat([tf.sin(proj), tf.cos(proj)], axis=-1) # [N, M] - return constant * bases # [N, M] + proj = tf.matmul(X, W, transpose_b=True) # [N, M] + return tf.concat([tf.sin(proj), tf.cos(proj)], axis=-1) # [N, 2M] diff --git a/tests/gpflux/layers/basis_functions/fourier_features/test_quadrature.py b/tests/gpflux/layers/basis_functions/fourier_features/test_quadrature.py new file mode 100644 index 00000000..c27ce9a8 --- /dev/null +++ b/tests/gpflux/layers/basis_functions/fourier_features/test_quadrature.py @@ -0,0 +1,181 @@ +# +# Copyright (c) 2021 The GPflux Contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +import pytest +import tensorflow as tf +from tensorflow.python.keras.testing_utils import layer_test +from tensorflow.python.keras.utils.kernelized_utils import inner_product + +import gpflow +from gpflow.quadrature.gauss_hermite import NDiagGHQuadrature +from gpflow.utilities.ops import difference_matrix + +from gpflux.layers.basis_functions.fourier_features import QuadratureFourierFeatures +from gpflux.layers.basis_functions.fourier_features.utils import QFF_SUPPORTED_KERNELS + + +@pytest.fixture(name="n_dims", params=[1, 2, 3]) +def _n_dims_fixture(request): + return request.param + + +@pytest.fixture(name="n_components", params=[1, 2, 4, 20]) +def _n_features_fixture(request): + return request.param + + +@pytest.fixture(name="variance", params=[0.5, 1.0, 2.0]) +def _variance_fixture(request): + return request.param + + +@pytest.fixture(name="lengthscale", params=[0.75, 1.0, 5.0]) +def _lengthscale_fixture(request): + return request.param + + +@pytest.fixture(name="batch_size", params=[1, 10]) +def _batch_size_fixture(request): + return request.param + + +@pytest.fixture(name="kernel_cls", params=list(QFF_SUPPORTED_KERNELS)) +def _kernel_cls_fixture(request): + return request.param + + +def test_throw_for_unsupported_kernel(): + kernel = gpflow.kernels.Constant() + with pytest.raises(AssertionError) as excinfo: + QuadratureFourierFeatures(kernel, n_components=1) + assert "Unsupported Kernel" in str(excinfo.value) + + +def test_quadrature_fourier_features_can_approximate_kernel_multidim( + kernel_cls, variance, lengthscale, n_dims +): + + """ + Compare finite feature approximation to analytical kernel expression. + Approximation only holds for large enough lengthscales, as explained in TODO: notebook. + """ + n_components = 128 + + x_rows = 20 + y_rows = 30 + # ARD + + # small lengthscales can lead to large errors in this approximation + lengthscales = np.random.uniform(low=0.75, size=n_dims) * lengthscale + + kernel = kernel_cls(variance=variance, lengthscales=lengthscales) + fourier_features = QuadratureFourierFeatures(kernel, n_components, dtype=tf.float64) + + x = tf.random.uniform((x_rows, n_dims), dtype=tf.float64) + y = tf.random.uniform((y_rows, n_dims), dtype=tf.float64) + + u = fourier_features(x) + v = fourier_features(y) + approx_kernel_matrix = inner_product(u, v) + + actual_kernel_matrix = kernel.K(x, y) + + np.testing.assert_allclose(approx_kernel_matrix, actual_kernel_matrix) + + +def test_feature_map_decomposition(kernel_cls, variance, lengthscale, n_dims, n_components): + """ + Verify that the inner product of the feature map yields exactly the same + result as the direct Gauss-Hermite quadrature scheme. + """ + x_rows = 20 + y_rows = 30 + # ARD + lengthscales = np.random.rand(n_dims) * lengthscale + + kernel = kernel_cls(variance=variance, lengthscales=lengthscales) + fourier_features = QuadratureFourierFeatures(kernel, n_components, dtype=tf.float64) + + x = tf.random.uniform((x_rows, n_dims), dtype=tf.float64) + y = tf.random.uniform((y_rows, n_dims), dtype=tf.float64) + + u = fourier_features(x) + v = fourier_features(y) + K_decomp = inner_product(u, v) + + x_scaled = tf.truediv(x, lengthscales) + y_scaled = tf.truediv(y, lengthscales) + r = difference_matrix(x_scaled, y_scaled) + + def eigen_func(w): + return variance * tf.cos(tf.matmul(r, w, transpose_b=True)) + + quadrature = NDiagGHQuadrature(dim=n_dims, n_gh=n_components) + K_quadrature = quadrature( + eigen_func, + mean=tf.zeros((1, 1, n_dims), dtype=tf.float64), + var=tf.ones((1, 1, n_dims), dtype=tf.float64), + ) + K_quadrature = tf.squeeze(K_quadrature, axis=-1) + + np.testing.assert_allclose(K_decomp, K_quadrature, atol=1e-15) + + +def test_fourier_features_shapes(n_components, n_dims, batch_size): + input_shape = (batch_size, n_dims) + kernel = gpflow.kernels.SquaredExponential() + feature_functions = QuadratureFourierFeatures(kernel, n_components, dtype=tf.float64) + output_shape = feature_functions.compute_output_shape(input_shape) + output_dim = output_shape[-1] + assert output_dim == 2 * n_components ** n_dims + features = feature_functions(tf.ones(shape=input_shape)) + np.testing.assert_equal(features.shape, output_shape) + + +def test_keras_testing_util_layer_test_1D(kernel_cls, batch_size, n_components): + kernel = kernel_cls() + + tf.keras.utils.get_custom_objects()["QuadratureFourierFeatures"] = QuadratureFourierFeatures + layer_test( + QuadratureFourierFeatures, + kwargs={ + "kernel": kernel, + "n_components": n_components, + "input_dim": 1, + "dtype": "float64", + "dynamic": True, + }, + input_shape=(batch_size, 1), + input_dtype="float64", + ) + + +def test_keras_testing_util_layer_test_multidim(kernel_cls, batch_size, n_dims, n_components): + kernel = kernel_cls() + + tf.keras.utils.get_custom_objects()["QuadratureFourierFeatures"] = QuadratureFourierFeatures + layer_test( + QuadratureFourierFeatures, + kwargs={ + "kernel": kernel, + "n_components": n_components, + "input_dim": n_dims, + "dtype": "float64", + "dynamic": True, + }, + input_shape=(batch_size, n_dims), + input_dtype="float64", + ) diff --git a/tests/gpflux/layers/basis_functions/test_random_fourier_features.py b/tests/gpflux/layers/basis_functions/fourier_features/test_random.py similarity index 62% rename from tests/gpflux/layers/basis_functions/test_random_fourier_features.py rename to tests/gpflux/layers/basis_functions/fourier_features/test_random.py index d1c6cf85..a7a0f7a9 100644 --- a/tests/gpflux/layers/basis_functions/test_random_fourier_features.py +++ b/tests/gpflux/layers/basis_functions/fourier_features/test_random.py @@ -21,11 +21,11 @@ import gpflow -from gpflux.layers.basis_functions.random_fourier_features import ( - RFF_SUPPORTED_KERNELS, +from gpflux.layers.basis_functions.fourier_features import ( RandomFourierFeatures, RandomFourierFeaturesCosine, ) +from gpflux.layers.basis_functions.fourier_features.utils import RFF_SUPPORTED_KERNELS @pytest.fixture(name="n_dims", params=[1, 2, 3, 5, 10, 20]) @@ -33,7 +33,12 @@ def _n_dims_fixture(request): return request.param -@pytest.fixture(name="lengthscale", params=[1e-3, 0.1, 1.0, 5.0]) +@pytest.fixture(name="variance", params=[0.5, 1.0, 2.0]) +def _variance_fixture(request): + return request.param + + +@pytest.fixture(name="lengthscale", params=[0.1, 1.0, 5.0]) def _lengthscale_fixture(request): return request.param @@ -43,48 +48,40 @@ def _batch_size_fixture(request): return request.param -@pytest.fixture(name="n_features", params=[2, 4, 16, 128]) +@pytest.fixture(name="n_components", params=[1, 2, 4, 20, 100]) def _n_features_fixture(request): return request.param -@pytest.fixture(name="kernel_class", params=list(RFF_SUPPORTED_KERNELS)) -def _kernel_class_fixture(request): +@pytest.fixture(name="kernel_cls", params=list(RFF_SUPPORTED_KERNELS)) +def _kernel_cls_fixture(request): return request.param -@pytest.fixture( - name="random_feature_class", params=[RandomFourierFeatures, RandomFourierFeaturesCosine] -) -def _random_feature_class_fixture(request): +@pytest.fixture(name="basis_func_cls", params=[RandomFourierFeatures, RandomFourierFeaturesCosine]) +def _basis_func_cls_fixture(request): return request.param -def test_throw_for_odd_num_features(): - kernel = gpflow.kernels.Constant() - with pytest.raises(AssertionError) as excinfo: - RandomFourierFeatures(kernel, output_dim=3) - assert "must specify an even number of random features" in str(excinfo.value) - - -def test_throw_for_unsupported_kernel(random_feature_class): +def test_throw_for_unsupported_kernel(basis_func_cls): kernel = gpflow.kernels.Constant() with pytest.raises(AssertionError) as excinfo: - random_feature_class(kernel, output_dim=2) + basis_func_cls(kernel, n_components=1) assert "Unsupported Kernel" in str(excinfo.value) -def test_fourier_features_can_approximate_kernel_multidim( - random_feature_class, kernel_class, lengthscale, n_dims +def test_random_fourier_features_can_approximate_kernel_multidim( + basis_func_cls, kernel_cls, variance, lengthscale, n_dims ): - n_features = 10000 + n_components = 40000 + x_rows = 20 y_rows = 30 # ARD lengthscales = np.random.rand((n_dims)) * lengthscale - kernel = kernel_class(lengthscales=lengthscales) - fourier_features = random_feature_class(kernel, n_features, dtype=tf.float64) + kernel = kernel_cls(variance=variance, lengthscales=lengthscales) + fourier_features = basis_func_cls(kernel, n_components, dtype=tf.float64) x = tf.random.uniform((x_rows, n_dims), dtype=tf.float64) y = tf.random.uniform((y_rows, n_dims), dtype=tf.float64) @@ -98,26 +95,18 @@ def test_fourier_features_can_approximate_kernel_multidim( np.testing.assert_allclose(approx_kernel_matrix, actual_kernel_matrix, atol=5e-2) -def test_fourier_features_shapes(random_feature_class, n_features, n_dims, batch_size): - kernel = gpflow.kernels.SquaredExponential() - fourier_features = random_feature_class(kernel, n_features, dtype=tf.float64) - features = fourier_features(tf.ones(shape=(batch_size, n_dims))) - - np.testing.assert_equal(features.shape, [batch_size, n_features]) - - -def test_fourier_feature_layer_compute_covariance_of_inducing_variables( - random_feature_class, batch_size +def test_random_fourier_feature_layer_compute_covariance_of_inducing_variables( + basis_func_cls, batch_size ): """ Ensure that the random fourier feature map can be used to approximate the covariance matrix between the inducing point vectors of a sparse GP, with the condition that the number of latent GP models is greater than one. """ - n_features = 10000 + n_components = 10000 kernel = gpflow.kernels.SquaredExponential() - fourier_features = random_feature_class(kernel, n_features, dtype=tf.float64) + fourier_features = basis_func_cls(kernel, n_components, dtype=tf.float64) x_new = tf.ones(shape=(2 * batch_size + 1, 1), dtype=tf.float64) @@ -129,15 +118,24 @@ def test_fourier_feature_layer_compute_covariance_of_inducing_variables( np.testing.assert_allclose(approx_kernel_matrix, actual_kernel_matrix, atol=5e-2) -def test_keras_testing_util_layer_test_1D(kernel_class, batch_size, n_features): - kernel = kernel_class() +def test_fourier_features_shapes(basis_func_cls, n_components, n_dims, batch_size): + input_shape = (batch_size, n_dims) + kernel = gpflow.kernels.SquaredExponential() + feature_functions = basis_func_cls(kernel, n_components, dtype=tf.float64) + output_shape = feature_functions.compute_output_shape(input_shape) + features = feature_functions(tf.ones(shape=input_shape)) + np.testing.assert_equal(features.shape, output_shape) + + +def test_keras_testing_util_layer_test_1D(kernel_cls, batch_size, n_components): + kernel = kernel_cls() tf.keras.utils.get_custom_objects()["RandomFourierFeatures"] = RandomFourierFeatures layer_test( RandomFourierFeatures, kwargs={ "kernel": kernel, - "output_dim": n_features, + "n_components": n_components, "input_dim": 1, "dtype": "float64", "dynamic": True, @@ -147,15 +145,15 @@ def test_keras_testing_util_layer_test_1D(kernel_class, batch_size, n_features): ) -def test_keras_testing_util_layer_test_multidim(kernel_class, batch_size, n_dims, n_features): - kernel = kernel_class() +def test_keras_testing_util_layer_test_multidim(kernel_cls, batch_size, n_dims, n_components): + kernel = kernel_cls() tf.keras.utils.get_custom_objects()["RandomFourierFeatures"] = RandomFourierFeatures layer_test( RandomFourierFeatures, kwargs={ "kernel": kernel, - "output_dim": n_features, + "n_components": n_components, "input_dim": n_dims, "dtype": "float64", "dynamic": True, diff --git a/tests/gpflux/sampling/test_sample.py b/tests/gpflux/sampling/test_sample.py index 50096d01..70b27588 100644 --- a/tests/gpflux/sampling/test_sample.py +++ b/tests/gpflux/sampling/test_sample.py @@ -20,7 +20,7 @@ import gpflow from gpflow.config import default_float, default_jitter -from gpflux.layers.basis_functions.random_fourier_features import RandomFourierFeatures +from gpflux.layers.basis_functions.fourier_features import RandomFourierFeaturesCosine from gpflux.sampling.kernel_with_feature_decomposition import KernelWithFeatureDecomposition from gpflux.sampling.sample import Sample, efficient_sample @@ -77,7 +77,7 @@ def test_conditional_sample(kernel, inducing_variable, whiten): def test_wilson_efficient_sample(kernel, inducing_variable, whiten): """Smoke and consistency test for efficient sampling using Wilson""" - eigenfunctions = RandomFourierFeatures(kernel, 100, dtype=default_float()) + eigenfunctions = RandomFourierFeaturesCosine(kernel, 100, dtype=default_float()) eigenvalues = np.ones((100, 1), dtype=default_float()) # To apply Wilson sampling we require the features and eigenvalues of the kernel kernel2 = KernelWithFeatureDecomposition(kernel, eigenfunctions, eigenvalues)