Skip to content

Commit

Permalink
Merge pull request #305 from willu47/salib_to_df
Browse files Browse the repository at this point in the history
Use SALib ResultDict.to_df() function to provide results in pandas dataframe format
  • Loading branch information
mastoffel authored Feb 25, 2025
2 parents 7a123ad + 85dd1d0 commit 9359041
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 73 deletions.
89 changes: 44 additions & 45 deletions autoemulate/sensitivity_analysis.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import Dict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from SALib.analyze.sobol import analyze
from SALib.sample.sobol import sample
from SALib.util import ResultDict

from autoemulate.utils import _ensure_2d

Expand Down Expand Up @@ -44,7 +47,7 @@ def _sensitivity_analysis(
Si = _sobol_analysis(model, problem, X, N, conf_level)

if as_df:
return _sobol_results_to_df(Si, problem)
return _sobol_results_to_df(Si)
else:
return Si

Expand Down Expand Up @@ -101,7 +104,9 @@ def _generate_problem(X):
}


def _sobol_analysis(model, problem=None, X=None, N=1024, conf_level=0.95):
def _sobol_analysis(
model, problem=None, X=None, N=1024, conf_level=0.95
) -> Dict[str, ResultDict]:
"""
Perform Sobol sensitivity analysis on a fitted emulator.
Expand Down Expand Up @@ -148,7 +153,7 @@ def _sobol_analysis(model, problem=None, X=None, N=1024, conf_level=0.95):
return results


def _sobol_results_to_df(results, problem=None):
def _sobol_results_to_df(results: Dict[str, ResultDict]) -> pd.DataFrame:
"""
Convert Sobol results to a (long-format) pandas DataFrame.
Expand All @@ -164,56 +169,50 @@ def _sobol_results_to_df(results, problem=None):
pd.DataFrame
A DataFrame with columns: 'output', 'parameter', 'index', 'value', 'confidence'.
"""
rename_dict = {
"variable": "index",
"S1": "value",
"S1_conf": "confidence",
"ST": "value",
"ST_conf": "confidence",
"S2": "value",
"S2_conf": "confidence",
}
rows = []
# Use custom names if provided, else default to "x1", "x2", etc.
parameter_names = (
problem["names"]
if problem is not None
else [f"X{i+1}" for i in range(len(next(iter(results.values()))["S1"]))]
)
for output, result in results.items():
s1, st, s2 = result.to_df()
s1 = (
s1.reset_index()
.rename(columns={"index": "parameter"})
.rename(columns=rename_dict)
)
s1["index"] = "S1"
st = (
st.reset_index()
.rename(columns={"index": "parameter"})
.rename(columns=rename_dict)
)
st["index"] = "ST"
s2 = (
s2.reset_index()
.rename(columns={"index": "parameter"})
.rename(columns=rename_dict)
)
s2["index"] = "S2"

df = pd.concat([s1, st, s2])
df["output"] = output
rows.append(df[["output", "parameter", "index", "value", "confidence"]])

for output, indices in results.items():
for index_type in ["S1", "ST", "S2"]:
values = indices.get(index_type)
conf_values = indices.get(f"{index_type}_conf")
if values is None or conf_values is None:
continue

if index_type in ["S1", "ST"]:
rows.extend(
{
"output": output,
"parameter": parameter_names[i], # Use appropriate names
"index": index_type,
"value": value,
"confidence": conf,
}
for i, (value, conf) in enumerate(zip(values, conf_values))
)

elif index_type == "S2":
n = values.shape[0]
rows.extend(
{
"output": output,
"parameter": f"{parameter_names[i]}-{parameter_names[j]}", # Use appropriate names
"index": index_type,
"value": values[i, j],
"confidence": conf_values[i, j],
}
for i in range(n)
for j in range(i + 1, n)
if not np.isnan(values[i, j])
)
return pd.DataFrame(rows)
return pd.concat(rows)


# plotting --------------------------------------------------------------------


def _validate_input(results, problem, index):
def _validate_input(results, index):
if not isinstance(results, pd.DataFrame):
results = _sobol_results_to_df(results, problem=problem)
results = _sobol_results_to_df(results)
# we only want to plot one index type at a time
valid_indices = ["S1", "S2", "ST"]
if index not in valid_indices:
Expand Down
35 changes: 7 additions & 28 deletions tests/test_sensitivity_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pandas as pd
import pytest
from sklearn.datasets import make_regression

from autoemulate.emulators import RandomForest
from autoemulate.experimental_design import LatinHypercube
Expand Down Expand Up @@ -151,21 +150,11 @@ def sobol_results_1d(model_1d):
# # test conversion to DataFrame --------------------------------------------------
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize(
"problem, expected_names",
[
(
{
"num_vars": 2,
"names": ["c", "v0"],
"bounds": [(-5.0, 1.0), (0.0, 1000.0)],
},
["c", "v0", "c-v0"],
),
(None, ["X1", "X2", "X1-X2"]),
],
"expected_names",
[["c", "v0", "c", "v0", ["c", "v0"]]],
)
def test_sobol_results_to_df(sobol_results_1d, problem, expected_names):
df = _sobol_results_to_df(sobol_results_1d, problem)
def test_sobol_results_to_df(sobol_results_1d, expected_names):
df = _sobol_results_to_df(sobol_results_1d)
assert isinstance(df, pd.DataFrame)
assert df.columns.tolist() == [
"output",
Expand All @@ -174,7 +163,7 @@ def test_sobol_results_to_df(sobol_results_1d, problem, expected_names):
"value",
"confidence",
]
assert expected_names in df["parameter"].unique()
assert expected_names == df["parameter"].to_list()
assert all(isinstance(x, float) for x in df["value"])
assert all(isinstance(x, float) for x in df["confidence"])

Expand All @@ -185,23 +174,13 @@ def test_sobol_results_to_df(sobol_results_1d, problem, expected_names):
# test _validate_input ----------------------------------------------------------
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_validate_input(sobol_results_1d):
problem = {
"num_vars": 2,
"names": ["c", "v0"],
"bounds": [(-5.0, 1.0), (0.0, 1000.0)],
}
with pytest.raises(ValueError):
_validate_input(sobol_results_1d, problem=problem, index="S3")
_validate_input(sobol_results_1d, index="S3")


@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_validate_input_valid(sobol_results_1d):
problem = {
"num_vars": 2,
"names": ["c", "v0"],
"bounds": [(-5.0, 1.0), (0.0, 1000.0)],
}
Si = _validate_input(sobol_results_1d, problem=problem, index="S1")
Si = _validate_input(sobol_results_1d, index="S1")
assert isinstance(Si, pd.DataFrame)


Expand Down

0 comments on commit 9359041

Please sign in to comment.