lvgig · davidhopkinson26 · May 13, 2024 · May 13, 2024 · May 13, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -49,6 +49,7 @@ Moved tests for these objects to new approach.
 - Bugfix to MeanResponseTransformer to ignore unobserved categorical levels.
 - Added test_BaseTwoColumnTransformer base class for columns that require a list of two columns for input
 - Added BaseDropOriginalMixin to mixin transformers to handle validation and method of dropping original features, also added appropriate test classes.
+- Refactored MeanImputer tests in new format `#250 <https://github.com/lvgig/tubular/pull/250>`_
 
 
 Removed

diff --git a/tests/imputers/test_MeanImputer.py b/tests/imputers/test_MeanImputer.py
@@ -1,75 +1,36 @@
 import numpy as np
-import pandas as pd
-import pytest
 import test_aide as ta
 
 import tests.test_data as d
-import tubular
-import tubular.mixins
+from tests.base_tests import (
+    ColumnStrListInitTests,
+    GenericFitTests,
+    GenericTransformTests,
+    OtherBaseBehaviourTests,
+    WeightColumnFitTests,
+    WeightColumnInitTests,
+)
+from tests.imputers.test_BaseImputer import (
+    GenericImputerTransformTests,
+    GenericImputerTransformTestsWeight,
+)
 from tubular.imputers import MeanImputer
 
 
-class TestInit:
-    """Tests for MeanImputer.init()."""
+class TestInit(WeightColumnInitTests, ColumnStrListInitTests):
+    """Generic tests for transformer.init()."""
 
-    def test_super_init_called(self, mocker):
-        """Test that init calls BaseTransformer.init."""
-        expected_call_args = {
-            0: {"args": (), "kwargs": {"columns": None, "verbose": True}},
-        }
+    @classmethod
+    def setup_class(cls):
+        cls.transformer_name = "MeanImputer"
 
-        with ta.functions.assert_function_call(
-            mocker,
-            tubular.base.BaseTransformer,
-            "__init__",
-            expected_call_args,
-        ):
-            MeanImputer(columns=None, verbose=True)
 
-    @pytest.mark.parametrize("weights_column", (0, ["a"], {"a": 10}))
-    def test_weight_arg_errors(self, weights_column):
-        """Test that appropriate errors are throw for bad weight arg."""
-        with pytest.raises(
-            TypeError,
-            match="weights_column should be str or None",
-        ):
-            MeanImputer(columns=["s"], weights_column=weights_column)
+class TestFit(WeightColumnFitTests, GenericFitTests):
+    """Generic tests for transformer.fit()"""
 
-
-class TestFit:
-    """Tests for MeanImputer.fit()."""
-
-    def test_super_fit_called(self, mocker):
-        """Test that fit calls BaseTransformer.fit."""
-        df = d.create_df_3()
-
-        x = MeanImputer(columns=["a", "b", "c"])
-
-        expected_call_args = {0: {"args": (d.create_df_3(), None), "kwargs": {}}}
-
-        with ta.functions.assert_function_call(
-            mocker,
-            tubular.base.BaseTransformer,
-            "fit",
-            expected_call_args,
-        ):
-            x.fit(df)
-
-    def test_check_weights_column_called(self, mocker):
-        """Test that fit calls WeightColumnMixin.check_weights_column - when weights are used."""
-        df = d.create_df_9()
-
-        x = MeanImputer(columns=["a", "b"], weights_column="c")
-
-        expected_call_args = {0: {"args": (x, d.create_df_9(), "c"), "kwargs": {}}}
-
-        with ta.functions.assert_function_call(
-            mocker,
-            tubular.mixins.WeightColumnMixin,
-            "check_weights_column",
-            expected_call_args,
-        ):
-            x.fit(df)
+    @classmethod
+    def setup_class(cls):
+        cls.transformer_name = "MeanImputer"
 
     def test_learnt_values(self):
         """Test that the impute values learnt during fit are expected."""
@@ -110,223 +71,25 @@ def test_learnt_values_weighted(self):
             msg="impute_values_ attribute",
         )
 
-    def test_fit_returns_self(self):
-        """Test fit returns self?."""
-        df = d.create_df_1()
-
-        x = MeanImputer(columns="a")
-
-        x_fitted = x.fit(df)
-
-        assert x_fitted is x, "Returned value from MeanImputer.fit not as expected."
-
-    def test_fit_returns_self_weighted(self):
-        """Test fit returns self - when weight is used."""
-        df = d.create_df_9()
-
-        x = MeanImputer(columns="a", weights_column="c")
-
-        x_fitted = x.fit(df)
-
-        assert x_fitted is x, "Returned value from MeanImputer.fit not as expected."
-
-    def test_fit_not_changing_data(self):
-        """Test fit does not change X."""
-        df = d.create_df_1()
-
-        x = MeanImputer(columns="a")
-
-        x.fit(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=d.create_df_1(),
-            actual=df,
-            msg="Check X not changing during fit",
-        )
-
-    def test_fit_not_changing_data_weighted(self):
-        """Test fit does not change X - when weights are used."""
-        df = d.create_df_9()
-
-        x = MeanImputer(columns="a", weights_column="c")
-
-        x.fit(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=d.create_df_9(),
-            actual=df,
-            msg="Check X not changing during fit",
-        )
-
-
-class TestTransform:
-    """Tests for MeanImputer.transform()."""
-
-    def expected_df_1():
-        """Expected output for test_nulls_imputed_correctly."""
-        df = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6, np.nan],
-                "b": [1, 2, 3, np.nan, 7, 8, 9],
-                "c": [np.nan, 1, 2, 3, -4, -5, -6],
-            },
-        )
-
-        for col in ["a", "b", "c"]:
-            df.loc[df[col].isna(), col] = df[col].mean()
-
-        return df
-
-    def expected_df_2():
-        """Expected output for test_nulls_imputed_correctly_2."""
-        df = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6, np.nan],
-                "b": [1, 2, 3, np.nan, 7, 8, 9],
-                "c": [np.nan, 1, 2, 3, -4, -5, -6],
-            },
-        )
-
-        for col in ["a"]:
-            df.loc[df[col].isna(), col] = df[col].mean()
-
-        return df
-
-    def expected_df_3():
-        """Expected output for test_nulls_imputed_correctly_3."""
-        df = d.create_df_9()
-
-        for col, value in zip(["a", "b"], [59 / 15, 42 / 18]):
-            df.loc[df[col].isna(), col] = value
-
-        return df
-
-    def test_check_is_fitted_called(self, mocker):
-        """Test that BaseTransformer check_is_fitted called."""
-        df = d.create_df_1()
-
-        x = MeanImputer(columns="a")
-
-        x.fit(df)
-
-        expected_call_args = {0: {"args": (["impute_values_"],), "kwargs": {}}}
-
-        with ta.functions.assert_function_call(
-            mocker,
-            tubular.base.BaseTransformer,
-            "check_is_fitted",
-            expected_call_args,
-        ):
-            x.transform(df)
-
-    def test_super_transform_called(self, mocker):
-        """Test that BaseTransformer.transform called."""
-        df = d.create_df_1()
-
-        x = MeanImputer(columns="a")
-
-        x.fit(df)
-
-        expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}}
-
-        with ta.functions.assert_function_call(
-            mocker,
-            tubular.base.BaseTransformer,
-            "transform",
-            expected_call_args,
-        ):
-            x.transform(df)
-
-    @pytest.mark.parametrize(
-        ("df", "expected"),
-        ta.pandas.adjusted_dataframe_params(d.create_df_3(), expected_df_1()),
-    )
-    def test_nulls_imputed_correctly(self, df, expected):
-        """Test missing values are filled with the correct values."""
-        x = MeanImputer(columns=["a", "b", "c"])
-
-        # set the impute values dict directly rather than fitting x on df so test works with decorators
-        x.impute_values_ = {"a": 3.5, "b": 5.0, "c": -1.5}
-
-        df_transformed = x.transform(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=expected,
-            actual=df_transformed,
-            msg="Check nulls filled correctly in transform",
-        )
-
-    @pytest.mark.parametrize(
-        ("df", "expected"),
-        ta.pandas.adjusted_dataframe_params(d.create_df_3(), expected_df_2()),
-    )
-    def test_nulls_imputed_correctly_2(self, df, expected):
-        """Test missing values are filled with the correct values - and unrelated columns are not changed."""
-        x = MeanImputer(columns=["a"])
 
-        # set the impute values dict directly rather than fitting x on df so test works with decorators
-        x.impute_values_ = {"a": 3.5}
+class TestTransform(
+    GenericTransformTests,
+    GenericImputerTransformTestsWeight,
+    GenericImputerTransformTests,
+):
+    """Tests for transformer.transform."""
 
-        df_transformed = x.transform(df)
+    @classmethod
+    def setup_class(cls):
+        cls.transformer_name = "MeanImputer"
 
-        ta.equality.assert_equal_dispatch(
-            expected=expected,
-            actual=df_transformed,
-            msg="Check nulls filled correctly in transform",
-        )
 
-    @pytest.mark.parametrize(
-        ("df", "expected"),
-        ta.pandas.row_by_row_params(d.create_df_9(), expected_df_3())
-        + ta.pandas.index_preserved_params(d.create_df_9(), expected_df_3()),
-    )
-    def test_nulls_imputed_correctly_3(self, df, expected):
-        """Test missing values are filled with the correct values - and unrelated columns are not changed."""
-        x = MeanImputer(columns=["a", "b"], weights_column="c")
+class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
+    """
+    Class to run tests for BaseTransformerBehaviour behaviour outside the three standard methods.
+    May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
+    """
 
-        # set the impute values dict directly rather than fitting x on df so test works with decorators
-        x.impute_values_ = {"a": 59 / 15, "b": 42 / 18}
-
-        df_transformed = x.transform(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=expected,
-            actual=df_transformed,
-            msg="Check nulls filled correctly in transform",
-        )
-
-    def test_learnt_values_not_modified(self):
-        """Test that the impute_values_ from fit are not changed in transform."""
-        df = d.create_df_3()
-
-        x = MeanImputer(columns=["a", "b", "c"])
-
-        x.fit(df)
-
-        x2 = MeanImputer(columns=["a", "b", "c"])
-
-        x2.fit_transform(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=x.impute_values_,
-            actual=x2.impute_values_,
-            msg="Impute values not changed in transform",
-        )
-
-    def test_learnt_values_not_modified_weights(self):
-        """Test that the impute_values_ from fit are not changed in transform - when using weights."""
-        df = d.create_df_9()
-
-        x = MeanImputer(columns=["a", "b"], weights_column="c")
-
-        x.fit(df)
-
-        x2 = MeanImputer(columns=["a", "b"], weights_column="c")
-
-        x2.fit_transform(df)
-
-        ta.equality.assert_equal_dispatch(
-            expected=x.impute_values_,
-            actual=x2.impute_values_,
-            msg="Impute values not changed in transform",
-        )
+    @classmethod
+    def setup_class(cls):
+        cls.transformer_name = "MeanImputer"
diff --git a/tubular/imputers.py b/tubular/imputers.py
@@ -213,7 +213,7 @@ def fit(self, X: pd.DataFrame, y: pd.Series | None = None) -> pd.DataFrame:
         return self
 
 
-class MeanImputer(BaseImputer, WeightColumnMixin):
+class MeanImputer(WeightColumnMixin, BaseImputer):
     """Transformer to impute missing values with the mean of the supplied columns.
 
     Parameters
@@ -236,6 +236,8 @@ class MeanImputer(BaseImputer, WeightColumnMixin):
 
     """
 
+    FITS = True
+
     def __init__(
         self,
         columns: str | list[str] | None = None,
@@ -313,6 +315,8 @@ class ModeImputer(BaseImputer, WeightColumnMixin):
 
     """
 
+    FITS = True
+
     def __init__(
         self,
         columns: str | list[str] | None = None,