Skip to content

Commit

Permalink
Merge pull request #483 from bouthilx/feature/lpi
Browse files Browse the repository at this point in the history
Add LPI plot
  • Loading branch information
bouthilx authored Nov 28, 2020
2 parents d5db0c3 + 9dc425e commit d4a06a0
Show file tree
Hide file tree
Showing 14 changed files with 3,330 additions and 416 deletions.
1 change: 1 addition & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ requirements:
- pandas
- falcon
- gunicorn
- scikit-learn

test:
import:
Expand Down
19 changes: 17 additions & 2 deletions docs/src/user/web_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ Plots
The plot resource permits the generation and retrieval of `Plotly <https://plotly.com/>`_ plots to
visualize your experiments and their results.

.. http:get:: /plots/regret/:experiment
.. http:get:: /plots/lpi/:experiment
Return a regret plot for the specified experiment.
Return a lpi plot for the specified experiment.

**Example response**

Expand Down Expand Up @@ -274,6 +274,21 @@ visualize your experiments and their results.

:statuscode 404: When the specified experiment doesn't exist in the database.

.. http:get:: /plots/regret/:experiment
Return a regret plot for the specified experiment.

**Example response**

.. sourcecode:: http

HTTP/1.1 200 OK
Content-Type: text/javascript

The JSON output is generated automatically according to the `Plotly.js schema reference <https://plotly.com/python/reference/index/>`_.

:statuscode 404: When the specified experiment doesn't exist in the database.


Errors
------
Expand Down
2,905 changes: 2,600 additions & 305 deletions examples/plotting/plotting-api.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
]
},
install_requires=['PyYAML', 'pymongo>=3', 'numpy', 'scipy', 'gitpython', 'filelock',
'tabulate', 'AppDirs', 'plotly', 'pandas', 'gunicorn', 'falcon'],
'tabulate', 'AppDirs', 'plotly', 'pandas', 'gunicorn', 'falcon',
'scikit-learn'],
tests_require=tests_require,
setup_requires=['setuptools', 'appdirs', 'pytest-runner'],
extras_require=dict(test=tests_require),
Expand Down
3 changes: 2 additions & 1 deletion src/orion/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
:synopsis: Provides agnostic HPO analysis tools
"""

from orion.analysis.lpi import lpi
from orion.analysis.regret import regret

__all__ = ['regret']
__all__ = ['lpi', 'regret']
186 changes: 186 additions & 0 deletions src/orion/analysis/lpi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
"""
:mod:`orion.analysis.lpi` -- Provide tools to calculate Local Parameter Importance
==================================================================================
.. module:: orion.analysis.lpi
:platform: Unix
:synopsis: Provide tools to calculate Local Parameter Importance
"""
import numpy
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor,\
ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor

from orion.core.worker.transformer import build_required_space


_regressors_ = {
'AdaBoostRegressor': AdaBoostRegressor,
'BaggingRegressor': BaggingRegressor,
'ExtraTreesRegressor': ExtraTreesRegressor,
'GradientBoostingRegressor': GradientBoostingRegressor,
'RandomForestRegressor': RandomForestRegressor,
}


def train_regressor(regressor_name, data, **kwargs):
"""Train regressor model
Parameters
----------
model: str
Name of the regression model to use. Can be one of
- AdaBoostRegressor
- BaggingRegressor
- ExtraTreesRegressor
- GradientBoostingRegressor
- RandomForestRegressor (Default)
trials: DataFrame or dict
A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
equivalent.
**kwargs
Arguments for the regressor model.
"""
if regressor_name not in _regressors_:
raise ValueError(
f'{regressor_name} is not a supported regressor. '
f'Did you mean any of theses: list(_regressors_.keys())')

regressor = _regressors_[regressor_name](**kwargs)
return regressor.fit(data[:, :-1], data[:, -1])


def to_numpy(trials, space):
"""Convert trials in DataFrame to Numpy array of (params + objective)"""
return trials[list(space.keys()) + ['objective']].to_numpy()


def flatten(trials_array, flattened_space):
"""Flatten dimensions"""
flattened_points = numpy.array(
[flattened_space.transform(point[:-1]) for point in trials_array])

return numpy.concatenate((flattened_points, trials_array[:, -1:]), axis=1)


def make_grid(point, space, model, n):
"""Build a grid based on point.
The shape of the grid will be
(number of hyperparameters,
number of points ``n``,
number of hyperparameters + 1)
Last column is the objective predicted by the model for a given point.
Parameters
----------
point: numpy.ndarray
A tuple representation of the best trials, (hyperparameters + objective)
model: str
Name of the regression model to use. Can be one of
- AdaBoostRegressor
- BaggingRegressor
- ExtraTreesRegressor
- GradientBoostingRegressor
- RandomForestRegressor (Default)
trials: DataFrame or dict
A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
equivalent.
**kwargs
Arguments for the regressor model.
"""
grid = numpy.zeros((len(space), n, len(space) + 1))
for i, dim in enumerate(space.values()):
grid[i, :, :] = point
grid[i, :, i] = numpy.linspace(*dim.interval(), num=n)
grid[i, :, -1] = model.predict(grid[i, :, :-1])
return grid


def compute_variances(grid):
"""Compute variance for each hyperparameters"""
return grid[:, :, -1].var(axis=1)


def _lpi(point, space, model, n):
"""Local parameter importance for each hyperparameters"""
grid = make_grid(point, space, model, n)
variances = compute_variances(grid)
ratios = variances / variances.sum()
return pd.DataFrame(data=ratios, index=space.keys(), columns=['LPI'])


def _linear_lpi(point, space, model, n):
# TODO
return


modes = dict(
best=_lpi,
linear=_linear_lpi)


def lpi(trials, space, mode='best', model='RandomForestRegressor', n=20, **kwargs):
"""
Calculates the Local Parameter Importance for a collection of :class:`Trial`.
For more information on the metric, see original paper at
https://ml.informatik.uni-freiburg.de/papers/18-LION12-CAVE.pdf.
Biedenkapp, André, et al. "Cave: Configuration assessment, visualization and evaluation."
International Conference on Learning and Intelligent Optimization. Springer, Cham, 2018.
Parameters
----------
trials: DataFrame or dict
A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
equivalent.
space: Space object
A space object from an experiment.
mode: str
Mode to compute the LPI.
- ``best``: Take the best trial found as the anchor for the LPI
- ``linear``: Recompute LPI for all values on a grid
model: str
Name of the regression model to use. Can be one of
- AdaBoostRegressor
- BaggingRegressor
- ExtraTreesRegressor
- GradientBoostingRegressor
- RandomForestRegressor (Default)
n: int
Number of points to compute the variances. Default is 20.
**kwargs
Arguments for the regressor model.
Returns
-------
DataFrame
LPI value for each parameter. If ``mode`` is `linear`, then a list of
param values and LPI metrics are returned in a DataFrame format.
"""
flattened_space = build_required_space(
space, type_requirement='numerical', shape_requirement='flattened')
if trials.empty or trials.shape[0] == 0:
return pd.DataFrame(
data=[0] * len(flattened_space),
index=flattened_space.keys(),
columns=['LPI'])

data = to_numpy(trials, space)
data = flatten(data, flattened_space)
model = train_regressor(model, data, **kwargs)
best_point = data[numpy.argmin(data[:, -1])]
results = modes[mode](best_point, flattened_space, model, n)
return results
22 changes: 13 additions & 9 deletions src/orion/core/worker/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,23 @@ def reshape(space, shape_requirement):
reshaped_space = ReshapedSpace(space)

for dim_index, dim in enumerate(space.values()):
if numpy.prod(dim.shape) == 1:
if not dim.shape or numpy.prod(dim.shape) == 1:
reshaped_space.register(
ReshapedDimension(
transformer=Identity(dim.type),
original_dimension=dim
original_dimension=dim,
index=dim_index
)
)
else:
for index in itertools.product(*map(range, dim.shape)):
key = f'{dim.name}[{",".join(map(str, index))}]'
reshaped_space.register(
ReshapedDimension(
transformer=View(dim.shape, index, dim_index, dim.type),
transformer=View(dim.shape, index, dim.type),
original_dimension=dim,
name=key
name=key,
index=dim_index
)
)

Expand Down Expand Up @@ -233,6 +235,8 @@ def transform(self, point):
# pylint:disable=unused-argument
def reverse(self, transformed_point, index=None):
"""Return `transformed_point` as it is."""
if index is not None:
return transformed_point[index]
return transformed_point

def repr_format(self, what):
Expand Down Expand Up @@ -529,10 +533,9 @@ def reverse(self, transformed_point, index=None):
class View(Transformer):
"""Look-up single index in a dimensions with shape > 1"""

def __init__(self, shape, index, dim_index, domain_type=None):
def __init__(self, shape, index, domain_type=None):
self.shape = shape
self.index = index
self.dim_index = dim_index
self._domain_type = domain_type

@property
Expand All @@ -542,7 +545,7 @@ def first(self):

def transform(self, point):
"""Only return one element of the group"""
return point[self.dim_index][self.index]
return point[self.index]

def reverse(self, transformed_point, index=None):
"""Only return packend point if view of first element, otherwise drop."""
Expand Down Expand Up @@ -679,11 +682,12 @@ def cardinality(self):
class ReshapedDimension(TransformedDimension):
"""Duck-type `Dimension` to mimic its functionality."""

def __init__(self, transformer, original_dimension, name=None):
def __init__(self, transformer, original_dimension, index, name=None):
super(ReshapedDimension, self).__init__(transformer, original_dimension)
if name is None:
name = original_dimension.name
self._name = name
self.index = index

@property
def first(self):
Expand All @@ -692,7 +696,7 @@ def first(self):

def transform(self, point):
"""Expose `Transformer.transform` interface from underlying instance."""
return self.transformer.transform(point)
return self.transformer.transform(point[self.index])

def reverse(self, transformed_point, index=None):
"""Expose `Transformer.reverse` interface from underlying instance."""
Expand Down
Loading

0 comments on commit d4a06a0

Please sign in to comment.