Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

setup ArbitraryTransformer with new testing appraoch #212

Merged
merged 4 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 43 additions & 121 deletions tests/imputers/test_ArbitraryImputer.py
Original file line number Diff line number Diff line change
@@ -1,144 +1,54 @@
import pytest
import test_aide as ta

import tests.test_data as d
import tubular
from tests.base_tests import (
ColumnStrListInitTests,
GenericFitTests,
GenericTransformTests,
OtherBaseBehaviourTests,
)
from tests.imputers.test_BaseImputer import GenericImputerTransformTests
from tubular.imputers import ArbitraryImputer


class TestInit:
"""Tests for ArbitraryImputer.init()."""
class TestInit(ColumnStrListInitTests):
"""Generic tests for transformer.init()."""

def test_super_init_called(self, mocker):
"""Test that init calls BaseTransformer.init."""
expected_call_args = {
0: {"args": (), "kwargs": {"columns": "a", "verbose": True}},
}
@classmethod
def setup_class(cls):
cls.transformer_name = "ArbitraryImputer"

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"__init__",
expected_call_args,
):
ArbitraryImputer(impute_value=1, columns="a", verbose=True)
def test_impute_value_type_error(
self,
uninitialized_transformers,
minimal_attribute_dict,
):
"""Test that an exception is raised if impute_value is not an int, float or str."""

def test_columns_none_error(self):
"""Test that an exception is raised if columns is passed as None."""
with pytest.raises(
ValueError,
match="ArbitraryImputer: columns must be specified in init for ArbitraryImputer",
):
ArbitraryImputer(impute_value=1, columns=None)
args = minimal_attribute_dict[self.transformer_name].copy()
args["impute_value"] = [1, 2]

def test_impute_value_type_error(self):
"""Test that an exception is raised if impute_value is not an int, float or str."""
with pytest.raises(
ValueError,
match="ArbitraryImputer: impute_value should be a single value .*",
):
ArbitraryImputer(impute_value={}, columns="a")

def test_impute_values_set_to_attribute(self):
"""Test that the value passed for impute_value is saved in an attribute of the same name."""
value = 1

x = ArbitraryImputer(impute_value=value, columns="a")

ta.classes.test_object_attributes(
obj=x,
expected_attributes={"impute_value": value, "impute_values_": {}},
msg="Attributes for ArbitraryImputer set in init",
)


class TestTransform:
"""Tests for ArbitraryImputer.transform()."""

def test_check_is_fitted_called(self, mocker):
"""Test that BaseTransformer check_is_fitted called."""
df = d.create_df_1()

x = ArbitraryImputer(impute_value=1, columns="a")

expected_call_args = {0: {"args": (["impute_value"],), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"check_is_fitted",
expected_call_args,
):
x.transform(df)

def test_super_transform_called(self, mocker):
"""Test that BaseImputer.transform called."""
df = d.create_df_2()

x = ArbitraryImputer(impute_value=1, columns="a")

expected_call_args = {0: {"args": (d.create_df_2(),), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.imputers.BaseImputer,
"transform",
expected_call_args,
):
x.transform(df)

def test_impute_values_set(self, mocker):
"""Test that impute_values_ are set with imput_value in transform."""
df = d.create_df_2()

x = ArbitraryImputer(impute_value=1, columns=["a", "b", "c"])

# mock BaseImputer.transform to return a Dataframe so it does not run
mocker.patch.object(
tubular.imputers.BaseImputer,
"transform",
return_value=df.copy(),
)

x.transform(df)

assert x.impute_values_ == {
"a": 1,
"b": 1,
"c": 1,
}, "impute_values_ not set with imput_value in transform"
uninitialized_transformers[self.transformer_name](**args)

def test_impute_value_unchanged(self):
"""Test that self.impute_value is unchanged after transform."""
df = d.create_df_1()

value = 1
class TestFit(GenericFitTests):
"""Generic tests for transformer.fit()"""

x = ArbitraryImputer(impute_value=value, columns="a")
@classmethod
def setup_class(cls):
cls.transformer_name = "ArbitraryImputer"

x.transform(df)

ta.classes.test_object_attributes(
obj=x,
expected_attributes={"impute_value": value},
msg="impute_value changed in transform",
)
class TestTransform(GenericImputerTransformTests, GenericTransformTests):
"""Tests for transformer.transform."""

def test_super_columns_check_called(self, mocker):
"""Test that BaseTransformer.columns_check called."""
df = d.create_df_2()

x = ArbitraryImputer(impute_value=-1, columns="a")

expected_call_args = {0: {"args": (d.create_df_2(),), "kwargs": {}}}

with ta.functions.assert_function_call(
mocker,
tubular.base.BaseTransformer,
"columns_check",
expected_call_args,
):
x.transform(df)
@classmethod
def setup_class(cls):
cls.transformer_name = "ArbitraryImputer"

# Unit testing to check if downcast datatypes of columns is preserved after imputation is done
def test_impute_value_preserve_dtype(self):
Expand Down Expand Up @@ -167,3 +77,15 @@ def test_impute_value_preserve_dtype(self):
# Checking if the dtype of "a" and "b" are int8 and float16 respectively after imputation
assert df["a"].dtype == "int8"
assert df["b"].dtype == "float16"


lsumption marked this conversation as resolved.
Show resolved Hide resolved
class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour outside the three standard methods.

May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
"""

@classmethod
def setup_class(cls):
cls.transformer_name = "ArbitraryImputer"
107 changes: 64 additions & 43 deletions tests/imputers/test_BaseImputer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

import numpy as np
import pandas as pd
import pytest
Expand All @@ -8,50 +10,40 @@
from tests.base_tests import (
ColumnStrListInitTests,
GenericFitTests,
GenericTransformTests,
OtherBaseBehaviourTests,
)
from tubular.imputers import BaseImputer


class BaseImputerTransformTests(GenericTransformTests):
def test_not_fitted_error_raised(self):
df = pd.DataFrame(
{
"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
"b": ["a", "b", "c", "d", "e", "f", np.nan],
"c": ["a", "b", "c", "d", "e", "f", np.nan],
},
)
class GenericImputerTransformTests:
def test_not_fitted_error_raised(self, initialized_transformers):
if initialized_transformers[self.transformer_name].FITS:
df = pd.DataFrame(
{
"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
"b": ["a", "b", "c", "d", "e", "f", np.nan],
"c": ["a", "b", "c", "d", "e", "f", np.nan],
},
)

x = BaseImputer(columns=["b", "c"])
with pytest.raises(NotFittedError):
initialized_transformers[self.transformer_name].transform(df)

with pytest.raises(NotFittedError):
x.transform(df)
def test_impute_value_unchanged(self, initialized_transformers):
"""Test that self.impute_value is unchanged after transform."""
df = d.create_df_1()

transformer = initialized_transformers[self.transformer_name]
transformer.impute_values_ = {"a": 1}

class TestInit(ColumnStrListInitTests):
"""Generic tests for transformer.init()."""
impute_values = deepcopy(transformer.impute_values_)

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseImputer"
transformer.transform(df)


class TestFit(GenericFitTests):
"""Generic tests for transformer.fit()"""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseTransformer"


class TestTransform(BaseImputerTransformTests):
"""Tests for BaseImputer.transform."""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseTransformer"
ta.classes.test_object_attributes(
obj=transformer,
expected_attributes={"impute_values_": impute_values},
msg="impute_values_ changed in transform",
)

def expected_df_1():
"""Expected output of test_expected_output_1."""
Expand Down Expand Up @@ -99,56 +91,85 @@ def expected_df_3():
("df", "expected"),
ta.pandas.adjusted_dataframe_params(d.create_df_2(), expected_df_1()),
)
def test_expected_output_1(self, df, expected):
def test_expected_output_1(self, df, expected, initialized_transformers):
"""Test that transform is giving the expected output when applied to float column."""
x1 = BaseImputer(columns="a")
x1 = initialized_transformers[self.transformer_name]
x1.impute_values_ = {"a": 7}
x1.columns = ["a"]

df_transformed = x1.transform(df)

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="ArbitraryImputer transform col a",
msg=f"Error from {self.transformer_name} transform col a",
)

@pytest.mark.parametrize(
("df", "expected"),
ta.pandas.adjusted_dataframe_params(d.create_df_2(), expected_df_2()),
)
def test_expected_output_2(self, df, expected):
def test_expected_output_2(self, df, expected, initialized_transformers):
"""Test that transform is giving the expected output when applied to object column."""
x1 = BaseImputer(columns=["b"])
x1 = initialized_transformers[self.transformer_name]

x1.impute_values_ = {"b": "g"}
x1.columns = ["b"]

df_transformed = x1.transform(df)

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="ArbitraryImputer transform col b",
msg=f"Error from {self.transformer_name} transform col b",
)

@pytest.mark.parametrize(
("df", "expected"),
ta.pandas.adjusted_dataframe_params(d.create_df_2(), expected_df_3()),
)
def test_expected_output_3(self, df, expected):
def test_expected_output_3(self, df, expected, initialized_transformers):
"""Test that transform is giving the expected output when applied to object and categorical columns."""
x1 = BaseImputer(columns=["b", "c"])
x1 = initialized_transformers[self.transformer_name]

x1.impute_values_ = {"b": "g", "c": "f"}
x1.columns = ["b", "c"]
# bit of a hack to make this work nicely for arbitrary imputer
lsumption marked this conversation as resolved.
Show resolved Hide resolved
x1.impute_value = "f"

df_transformed = x1.transform(df)

ta.equality.assert_equal_dispatch(
expected=expected,
actual=df_transformed,
msg="ArbitraryImputer transform col b, c",
msg=f"Error from {self.transformer_name} transform col b, c",
)


class TestInit(ColumnStrListInitTests):
"""Generic tests for transformer.init()."""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseImputer"


class TestFit(GenericFitTests):
"""Generic tests for transformer.fit()"""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseImputer"


class TestTransform(GenericImputerTransformTests):
"""Tests for BaseImputer.transform."""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseImputer"


class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour outside the three standard methods.
Expand Down
Loading
Loading