Skip to content

Commit

Permalink
fix domain bug, fix type hints in mud.base, upgrade python, add int…
Browse files Browse the repository at this point in the history
…egration tests (#57)

* fixes #56

* migrate set_shape

* fix mypy errors in base

* use latest publishing tools

* set up integration testing with mud-examples

* upgrade python versions
  • Loading branch information
mathematicalmichael authored Jul 4, 2022
1 parent a8c1d85 commit 19782bd
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
name: Test build process
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: ["3.7", "3.8", "3.9", "3.10"]
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand Down
31 changes: 28 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@ on:
- cron: "0 0 */7 * *"

jobs:
test:
unit-tests:
name: Run unit tests
strategy:
matrix:
python-version: [3.7, 3.9]
python-version: ["3.7", "3.10"]
runs-on: ubuntu-latest
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Checkout
uses: actions/checkout@v2
with:
Expand All @@ -50,3 +49,29 @@ jobs:
run: coveralls --service=github
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

integration-tests:
name: Run integration tests
strategy:
matrix:
python-version: ["3.9"]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install --upgrade pip
pip install --upgrade mud-examples
pip install -e .
- name: Check examples
run: mud_run_all -v
4 changes: 2 additions & 2 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
name: "Publish to PyPI"
strategy:
matrix:
python-version: [3.8]
python-version: ["3.9"]
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -27,7 +27,7 @@ jobs:
run: pip install --upgrade pip

- name: Install build tools
run: pip install .[pub]
run: pip install --upgrade .[pub]

- name: Build dist
run: |
Expand Down
122 changes: 65 additions & 57 deletions src/mud/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from typing import List, Union
from typing import Callable, List, Optional, Union

import numpy as np
from numpy.typing import ArrayLike
from matplotlib import pyplot as plt
from scipy.stats import rv_continuous
from scipy.stats import distributions as dist
from scipy.stats import gaussian_kde as gkde
from mud.util import make_2d_unit_mesh, null_space

# from numpy.typing import ArrayLike
from matplotlib import pyplot as plt # type: ignore
from scipy.stats import distributions as dist # type: ignore
from scipy.stats import gaussian_kde as gkde # type: ignore
from scipy.stats import rv_continuous # type: ignore

from mud.util import make_2d_unit_mesh, null_space, set_shape


class DensityProblem(object):
Expand All @@ -18,20 +20,20 @@ class DensityProblem(object):
Attributes
----------
X : ArrayLike
X : np.ndarray
Array containing parameter samples from an initial distribution.
Rows represent each sample while columns represent parameter values.
If 1 dimensional input is passed, assumed that it represents repeated
samples of a 1-dimensional parameter.
y : ArrayLike
y : np.ndarray
Array containing push-forward values of paramters samples through the
forward model. These samples will form the `predicted distribution`.
domain : ArrayLike
domain : np.ndarray
Array containing ranges of each paramter value in the parameter
space. Note that the number of rows must equal the number of
parameters, and the number of columns must always be two, for min/max
range.
weights : ArrayLike, optional
weights : np.ndarray, optional
Weights to apply to each parameter sample. Either a 1D array of the
same length as number of samples or a 2D array if more than
one set of weights is to be incorporated. If so the weights will be
Expand Down Expand Up @@ -88,15 +90,12 @@ class DensityProblem(object):

def __init__(
self,
X: ArrayLike,
y: ArrayLike,
domain: Union[np.ndarray, List] = None,
weights: Union[np.ndarray, List] = None,
X: np.ndarray,
y: np.ndarray,
domain: Optional[Union[np.ndarray, List[float]]] = None,
weights: Optional[Union[np.ndarray, List[float]]] = None,
):

# Set and validate inputs. Note we reshape inputs as necessary
def set_shape(x, y):
return x.reshape(y) if x.ndim < 2 else x
self.X = set_shape(np.array(X), (1, -1))
self.y = set_shape(np.array(y), (-1, 1))
self.domain = set_shape(np.array(domain), (1, -1))
Expand All @@ -107,15 +106,19 @@ def set_shape(x, y):
self._in = None # Initial values
self._pr = None # Predicted values
self._ob = None # Observed values
self._in_dist = None # Initial distirbution
self._in_dist = None # Initial distribution
self._pr_dist = None # Predicted distribution
self._ob_dist = None # Observed distribution

if self.domain is not None:
# Assert domain passed in is consitent with data array
assert self.domain.shape[0] == self.n_params
assert (
self.domain.shape[0] == self.n_params
), f"Size mismatch: domain: {self.domain.shape}, params: {self.X.shape}"

# Iniitialize weights
if weights is None:
weights = np.ones(self.X.shape[0])
self.set_weights(weights)

@property
Expand All @@ -130,7 +133,9 @@ def n_features(self):
def n_samples(self):
return self.y.shape[0]

def set_weights(self, weights: Union[np.ndarray, List], normalize: bool = False):
def set_weights(
self, weights: Union[np.ndarray, List[float]], normalize: bool = False
):
"""Set Sample Weights
Sets the weights to use for each sample. Note weights can be one or two
Expand All @@ -141,7 +146,7 @@ def set_weights(self, weights: Union[np.ndarray, List], normalize: bool = False)
Parameters
----------
weights : np.ndarray, List
weights : np.ndarray, List[float]
Numpy array or list of same length as the `n_samples` or if two
dimensional, number of columns should match `n_samples`
normalise : bool, default=False
Expand All @@ -152,28 +157,25 @@ def set_weights(self, weights: Union[np.ndarray, List], normalize: bool = False)
Warnings
--------
Resetting weights will delete the predicted and updated distirbution
Resetting weights will delete the predicted and updated distribution
values in the class, requiring a re-run of adequate `set_` methods
and/or `fit()` to reproduce with new weights.
"""
if weights is None:
w = np.ones(self.X.shape[0])
else:
if isinstance(weights, list):
weights = np.array(weights)
if isinstance(weights, list):
weights = np.array(weights)

# Reshape to 2D
w = weights.reshape(1, -1) if weights.ndim == 1 else weights
# Reshape to 2D
w = weights.reshape(1, -1) if weights.ndim == 1 else weights

# assert appropriate size
assert self.n_samples == w.shape[1], f"`weights` must size {self.n_samples}"
# assert appropriate size
assert self.n_samples == w.shape[1], f"`weights` must size {self.n_samples}"

# Multiply weights column wise for stacked weights
w = np.prod(w, axis=0)
# Multiply weights column wise for stacked weights
w = np.prod(w, axis=0)

# Normalize weight vector
if normalize:
w = np.divide(w, np.sum(w, axis=0))
# Normalize weight vector
if normalize:
w = np.divide(w, np.sum(w, axis=0))

self._weights = w
self._pr = None
Expand All @@ -200,7 +202,7 @@ def set_observed(self, distribution: rv_continuous = dist.norm()):
self._ob_dist = distribution
self._ob = distribution.pdf(self.y).prod(axis=1)

def set_initial(self, distribution: rv_continuous = None):
def set_initial(self, distribution: Optional[rv_continuous] = None):
"""
Set initial probability distribution of model parameter values
:math:`\\pi_{in}(\\lambda)`.
Expand All @@ -216,7 +218,7 @@ def set_initial(self, distribution: rv_continuous = None):
Warnings
--------
Setting initial distirbution resets the predicted and updated
Setting initial distribution resets the predicted and updated
distributions, so make sure to set the initial first.
"""
if distribution is None: # assume standard normal by default
Expand All @@ -226,8 +228,8 @@ def set_initial(self, distribution: rv_continuous = None):
distribution = dist.uniform(loc=mn, scale=mx - mn)
else:
distribution = dist.norm()

self._in_dist = distribution
assert self._in_dist is not None
self._in = self._in_dist.pdf(self.X).prod(axis=1)
self._up = None
self._pr = None
Expand All @@ -236,8 +238,8 @@ def set_initial(self, distribution: rv_continuous = None):
def set_predicted(
self,
distribution: rv_continuous = None,
bw_method: Union[str, callable, np.generic] = None,
weights: ArrayLike = None,
bw_method: Union[str, Callable, np.generic] = None,
weights: np.ndarray = None,
**kwargs,
):
"""
Expand All @@ -257,11 +259,11 @@ def set_predicted(
values y. This should be a frozen distribution if using
`scipy`, and otherwise be a class containing a `pdf()` method
return the probability density value for an array of values.
bw_method : str, scalar, or callable, optional
bw_method : str, scalar, or Callable, optional
Method to use to calculate estimator bandwidth. Only used if
distribution is not specified, See documentation for
:class:`scipy.stats.gaussian_kde` for more information.
weights : ArrayLike, optional
weights : np.ndarray, optional
Weights to use on predicted samples. Note that if specified,
:meth:`set_weights` will be run first to calculate new weights.
Otherwise, whatever was previously set as the weights is used.
Expand Down Expand Up @@ -411,7 +413,7 @@ def plot_param_space(
self,
param_idx: int = 0,
ax: plt.Axes = None,
x_range: Union[list, np.ndarray] = None,
x_range: Union[List[float], np.ndarray] = None,
aff: int = 1000,
in_opts={"color": "b", "linestyle": "--", "linewidth": 4, "label": "Initial"},
up_opts={"color": "k", "linestyle": "-.", "linewidth": 4, "label": "Updated"},
Expand Down Expand Up @@ -477,9 +479,10 @@ def plot_param_space(
_, ax = plt.subplots(1, 1) if ax is None else (None, ax)

# Default x_range to full domain of all parameters
x_range = x_range if x_range is not None else self.domain
x_range = np.array(x_range) if x_range is not None else self.domain
x_plot = np.linspace(x_range.T[0], x_range.T[1], num=aff)

assert self._in_dist is not None
# Plot distributions for all not set to None
if in_opts:
# Update default options with passed in options
Expand Down Expand Up @@ -516,7 +519,7 @@ def plot_obs_space(
self,
obs_idx: int = 0,
ax: plt.Axes = None,
y_range: ArrayLike = None,
y_range: np.ndarray = None,
aff=1000,
ob_opts={"color": "r", "linestyle": "-", "linewidth": 4, "label": "Observed"},
pr_opts={
Expand Down Expand Up @@ -585,6 +588,7 @@ def plot_obs_space(
y_range = np.repeat([[-1, 1]], self.n_features, axis=0)
y_plot = np.linspace(y_range.T[0], y_range.T[1], num=aff)

assert self._ob_dist is not None, "Observed dist empty"
if ob_opts:
# Update options with passed in values
oo.update(ob_opts)
Expand Down Expand Up @@ -661,15 +665,19 @@ def __init__(
domain: Union[np.ndarray, List] = None,
):

# Initialize inputs
self.X = np.array(X)
self.y = np.array(y)
self.y = self.y.reshape(-1, 1) if y.ndim == 1 else y
self.domain = np.array(domain).reshape(1, -1)
# Set and validate inputs. Note we reshape inputs as necessary
def set_shape(x, y):
return x.reshape(y) if x.ndim < 2 else x

self.X = set_shape(np.array(X), (1, -1))
self.y = set_shape(np.array(y), (-1, 1))
self.domain = set_shape(np.array(domain), (1, -1))

if self.domain is not None:
# Assert our domain passed in is consistent with data array
assert self.domain.shape[0] == self.n_params
assert (
self.domain.shape[0] == self.n_params
), f"Size mismatch: domain: {self.domain.shape}, params: {self.X.shape}"

# Initialize ps, predicted, and likelihood values/distributions
self._ps = None
Expand Down Expand Up @@ -858,19 +866,19 @@ class LinearGaussianProblem(object):
Attributes
----------
A : ArrayLike
A : np.ndarray
2D Array defining kinear transformation from model parameter space to
model output space.
y : ArrayLike
y : np.ndarray
1D Array containing observed values of Q(\\lambda)
Array containing push-forward values of paramters samples through the
forward model. These samples will form the `predicted distribution`.
domain : ArrayLike
domain : np.ndarray
Array containing ranges of each paramter value in the parameter
space. Note that the number of rows must equal the number of
parameters, and the number of columns must always be two, for min/max
range.
weights : ArrayLike, optional
weights : np.ndarray, optional
Weights to apply to each parameter sample. Either a 1D array of the
same length as number of samples or a 2D array if more than
one set of weights is to be incorporated. If so the weights will be
Expand Down
10 changes: 9 additions & 1 deletion src/mud/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import List, Tuple, Union

import numpy as np
from numpy.typing import ArrayLike
from scipy.special import erfinv


Expand Down Expand Up @@ -182,7 +185,7 @@ def make_2d_unit_mesh(N: int = 50, window: int = 1):
return (X, Y, XX)


def make_2d_normal_mesh(N=50, window=1):
def make_2d_normal_mesh(N: int = 50, window: int = 1):
"""
Constructs mesh based on normal distribution to
discretize each axis.
Expand All @@ -204,3 +207,8 @@ def make_2d_normal_mesh(N=50, window=1):
X, Y = np.meshgrid(X, Y)
XX = np.vstack([X.ravel(), Y.ravel()]).T
return (X, Y, XX)


def set_shape(array: ArrayLike, shape: Union[List, Tuple] = (1, -1)):
"""Resizes inputs if they are one-dimensional."""
return array.reshape(shape) if array.ndim < 2 else array

0 comments on commit 19782bd

Please sign in to comment.