Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated tested approach for meanimputer #250

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Moved tests for these objects to new approach.
- Bugfix to MeanResponseTransformer to ignore unobserved categorical levels.
- Added test_BaseTwoColumnTransformer base class for columns that require a list of two columns for input
- Added BaseDropOriginalMixin to mixin transformers to handle validation and method of dropping original features, also added appropriate test classes.
- Refactored MeanImputer tests in new format `#250 <https://github.com/lvgig/tubular/pull/250>`_


Removed
Expand Down
315 changes: 39 additions & 276 deletions tests/imputers/test_MeanImputer.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,36 @@
import numpy as np
import pandas as pd
import pytest
import test_aide as ta

import tests.test_data as d
import tubular
import tubular.mixins
from tests.base_tests import (
ColumnStrListInitTests,
GenericFitTests,
GenericTransformTests,
OtherBaseBehaviourTests,
WeightColumnFitTests,
WeightColumnInitTests,
)
from tests.imputers.test_BaseImputer import (
GenericImputerTransformTests,
GenericImputerTransformTestsWeight,
)
from tubular.imputers import MeanImputer


class TestInit:
"""Tests for MeanImputer.init()."""
class TestInit(WeightColumnInitTests, ColumnStrListInitTests):
"""Generic tests for transformer.init()."""

def test_super_init_called(self, mocker):
"""Test that init calls BaseTransformer.init."""
expected_call_args = {
0: {"args": (), "kwargs": {"columns": None, "verbose": True}},
}
@classmethod
def setup_class(cls):
cls.transformer_name = "MeanImputer"

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"__init__",
expected_call_args,
):
MeanImputer(columns=None, verbose=True)

@pytest.mark.parametrize("weights_column", (0, ["a"], {"a": 10}))
def test_weight_arg_errors(self, weights_column):
"""Test that appropriate errors are throw for bad weight arg."""
with pytest.raises(
TypeError,
match="weights_column should be str or None",
):
MeanImputer(columns=["s"], weights_column=weights_column)
class TestFit(WeightColumnFitTests, GenericFitTests):
"""Generic tests for transformer.fit()"""


class TestFit:
"""Tests for MeanImputer.fit()."""

def test_super_fit_called(self, mocker):
"""Test that fit calls BaseTransformer.fit."""
df = d.create_df_3()

x = MeanImputer(columns=["a", "b", "c"])

expected_call_args = {0: {"args": (d.create_df_3(), None), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"fit",
expected_call_args,
):
x.fit(df)

def test_check_weights_column_called(self, mocker):
"""Test that fit calls WeightColumnMixin.check_weights_column - when weights are used."""
df = d.create_df_9()

x = MeanImputer(columns=["a", "b"], weights_column="c")

expected_call_args = {0: {"args": (x, d.create_df_9(), "c"), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.mixins.WeightColumnMixin,
"check_weights_column",
expected_call_args,
):
x.fit(df)
@classmethod
def setup_class(cls):
cls.transformer_name = "MeanImputer"

def test_learnt_values(self):
"""Test that the impute values learnt during fit are expected."""
Expand Down Expand Up @@ -110,223 +71,25 @@ def test_learnt_values_weighted(self):
msg="impute_values_ attribute",
)

def test_fit_returns_self(self):
"""Test fit returns self?."""
df = d.create_df_1()

x = MeanImputer(columns="a")

x_fitted = x.fit(df)

assert x_fitted is x, "Returned value from MeanImputer.fit not as expected."

def test_fit_returns_self_weighted(self):
"""Test fit returns self - when weight is used."""
df = d.create_df_9()

x = MeanImputer(columns="a", weights_column="c")

x_fitted = x.fit(df)

assert x_fitted is x, "Returned value from MeanImputer.fit not as expected."

def test_fit_not_changing_data(self):
"""Test fit does not change X."""
df = d.create_df_1()

x = MeanImputer(columns="a")

x.fit(df)

ta.equality.assert_equal_dispatch(
expected=d.create_df_1(),
actual=df,
msg="Check X not changing during fit",
)

def test_fit_not_changing_data_weighted(self):
"""Test fit does not change X - when weights are used."""
df = d.create_df_9()

x = MeanImputer(columns="a", weights_column="c")

x.fit(df)

ta.equality.assert_equal_dispatch(
expected=d.create_df_9(),
actual=df,
msg="Check X not changing during fit",
)


class TestTransform:
"""Tests for MeanImputer.transform()."""

def expected_df_1():
"""Expected output for test_nulls_imputed_correctly."""
df = pd.DataFrame(
{
"a": [1, 2, 3, 4, 5, 6, np.nan],
"b": [1, 2, 3, np.nan, 7, 8, 9],
"c": [np.nan, 1, 2, 3, -4, -5, -6],
},
)

for col in ["a", "b", "c"]:
df.loc[df[col].isna(), col] = df[col].mean()

return df

def expected_df_2():
"""Expected output for test_nulls_imputed_correctly_2."""
df = pd.DataFrame(
{
"a": [1, 2, 3, 4, 5, 6, np.nan],
"b": [1, 2, 3, np.nan, 7, 8, 9],
"c": [np.nan, 1, 2, 3, -4, -5, -6],
},
)

for col in ["a"]:
df.loc[df[col].isna(), col] = df[col].mean()

return df

def expected_df_3():
"""Expected output for test_nulls_imputed_correctly_3."""
df = d.create_df_9()

for col, value in zip(["a", "b"], [59 / 15, 42 / 18]):
df.loc[df[col].isna(), col] = value

return df

def test_check_is_fitted_called(self, mocker):
"""Test that BaseTransformer check_is_fitted called."""
df = d.create_df_1()

x = MeanImputer(columns="a")

x.fit(df)

expected_call_args = {0: {"args": (["impute_values_"],), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"check_is_fitted",
expected_call_args,
):
x.transform(df)

def test_super_transform_called(self, mocker):
"""Test that BaseTransformer.transform called."""
df = d.create_df_1()

x = MeanImputer(columns="a")

x.fit(df)

expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"transform",
expected_call_args,
):
x.transform(df)

@pytest.mark.parametrize(
("df", "expected"),
ta.pandas.adjusted_dataframe_params(d.create_df_3(), expected_df_1()),
)
def test_nulls_imputed_correctly(self, df, expected):
"""Test missing values are filled with the correct values."""
x = MeanImputer(columns=["a", "b", "c"])

# set the impute values dict directly rather than fitting x on df so test works with decorators
x.impute_values_ = {"a": 3.5, "b": 5.0, "c": -1.5}

df_transformed = x.transform(df)

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="Check nulls filled correctly in transform",
)

@pytest.mark.parametrize(
("df", "expected"),
ta.pandas.adjusted_dataframe_params(d.create_df_3(), expected_df_2()),
)
def test_nulls_imputed_correctly_2(self, df, expected):
"""Test missing values are filled with the correct values - and unrelated columns are not changed."""
x = MeanImputer(columns=["a"])

# set the impute values dict directly rather than fitting x on df so test works with decorators
x.impute_values_ = {"a": 3.5}
class TestTransform(
GenericTransformTests,
GenericImputerTransformTestsWeight,
GenericImputerTransformTests,
):
"""Tests for transformer.transform."""

df_transformed = x.transform(df)
@classmethod
def setup_class(cls):
cls.transformer_name = "MeanImputer"

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="Check nulls filled correctly in transform",
)

@pytest.mark.parametrize(
("df", "expected"),
ta.pandas.row_by_row_params(d.create_df_9(), expected_df_3())
+ ta.pandas.index_preserved_params(d.create_df_9(), expected_df_3()),
)
def test_nulls_imputed_correctly_3(self, df, expected):
"""Test missing values are filled with the correct values - and unrelated columns are not changed."""
x = MeanImputer(columns=["a", "b"], weights_column="c")
class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour behaviour outside the three standard methods.
May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
"""

# set the impute values dict directly rather than fitting x on df so test works with decorators
x.impute_values_ = {"a": 59 / 15, "b": 42 / 18}

df_transformed = x.transform(df)

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="Check nulls filled correctly in transform",
)

def test_learnt_values_not_modified(self):
"""Test that the impute_values_ from fit are not changed in transform."""
df = d.create_df_3()

x = MeanImputer(columns=["a", "b", "c"])

x.fit(df)

x2 = MeanImputer(columns=["a", "b", "c"])

x2.fit_transform(df)

ta.equality.assert_equal_dispatch(
expected=x.impute_values_,
actual=x2.impute_values_,
msg="Impute values not changed in transform",
)

def test_learnt_values_not_modified_weights(self):
"""Test that the impute_values_ from fit are not changed in transform - when using weights."""
df = d.create_df_9()

x = MeanImputer(columns=["a", "b"], weights_column="c")

x.fit(df)

x2 = MeanImputer(columns=["a", "b"], weights_column="c")

x2.fit_transform(df)

ta.equality.assert_equal_dispatch(
expected=x.impute_values_,
actual=x2.impute_values_,
msg="Impute values not changed in transform",
)
@classmethod
def setup_class(cls):
cls.transformer_name = "MeanImputer"
6 changes: 5 additions & 1 deletion tubular/imputers.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def fit(self, X: pd.DataFrame, y: pd.Series | None = None) -> pd.DataFrame:
return self


class MeanImputer(BaseImputer, WeightColumnMixin):
class MeanImputer(WeightColumnMixin, BaseImputer):
"""Transformer to impute missing values with the mean of the supplied columns.

Parameters
Expand All @@ -236,6 +236,8 @@ class MeanImputer(BaseImputer, WeightColumnMixin):

"""

FITS = True

def __init__(
self,
columns: str | list[str] | None = None,
Expand Down Expand Up @@ -313,6 +315,8 @@ class ModeImputer(BaseImputer, WeightColumnMixin):

"""

FITS = True

def __init__(
self,
columns: str | list[str] | None = None,
Expand Down
Loading