Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/two column operator #291

Merged
merged 3 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Changed
- Refactored MeanResponseTransformer tests in new format `#262 <https://github.com/lvgig/tubular/pull/262>`_
- refactored build tools and package config into pyproject.toml `#271 <https://github.com/lvgig/tubular/pull/271>`_
- set up automatic versioning using setuptools-scm `#271 <https://github.com/lvgig/tubular/pull/271>`_
- Refactored TwoColumnOperatorTransformer tests in new format `#283 <https://github.com/lvgig/tubular/issues/274>`_


1.3.1 (2024-07-18)
Expand Down
2 changes: 1 addition & 1 deletion tests/base_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def test_columns_non_list_error(
minimal_attribute_dict,
uninitialized_transformers,
):
"""Test an error is raised if columns is not passed as a string not a list."""
Chip2916 marked this conversation as resolved.
Show resolved Hide resolved
"""Test an error is raised if columns is passed as a string not a list."""

args = minimal_attribute_dict[self.transformer_name].copy()
args["columns"] = non_list
Expand Down
154 changes: 38 additions & 116 deletions tests/numeric/test_TwoColumnOperatorTransformer.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,33 @@
import pandas as pd
import pytest
import test_aide as ta

import tests.test_data as d
import tubular
from tests.base_tests import (
GenericTransformTests,
NewColumnNameInitMixintests,
OtherBaseBehaviourTests,
TwoColumnListInitTests,
)
from tubular.numeric import TwoColumnOperatorTransformer

# @pytest.fixture(scope="module", autouse=True)
# def example_transformer():
# return TwoColumnOperatorTransformer(
# "mul",
# ["a", "b"],
# "c",
# )

@pytest.fixture(scope="module", autouse=True)
def example_transformer():
return TwoColumnOperatorTransformer(
"mul",
["a", "b"],
"c",
)


class TestTwoColumnOperatorTransformerInit:
"""Tests for TwoColumnMethodTransformer.__init__()."""

def test_column_type_error(self):
"""Checks that an error is raised if the column type is not a list."""
with pytest.raises(
TypeError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
"a, b",
"c",
pd_method_kwargs={"axis": 1},
)

def test_column_size_1_error(self):
"""Checks that the column is of length 2."""
with pytest.raises(
ValueError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
["a"],
"c",
pd_method_kwargs={"axis": 1},
)
class TestInit(
NewColumnNameInitMixintests,
TwoColumnListInitTests,
):
"""Generic tests for transformer.init()."""

def test_empty_list_error(self):
"""Checks that the list is not empty"""
with pytest.raises(
ValueError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
[],
"c",
pd_method_kwargs={"axis": 1},
)
@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"

def test_axis_not_present_error(self):
"""Checks that an error is raised if no axis element present in pd_method_kwargs dict."""
Expand All @@ -76,74 +47,13 @@ def test_axis_not_valid_error(self):
pd_method_kwargs={"axis": 2},
)

# TODO replace this with behaviour tests for DataFrameMethodTransformer init error handling
def test_DataFrameMethodTransformer_init_call(self, mocker):
"""Tests that the .__init__ method is called from the parent DataFrameMethodTransformer class."""
expected_call_args = {
0: {
"args": (),
"kwargs": {
"new_column_names": "c",
"pd_method_name": "mul",
"columns": ["a", "b"],
"pd_method_kwargs": {"axis": 0},
},
},
}

with ta.functions.assert_function_call(
mocker,
tubular.base.DataFrameMethodTransformer,
"__init__",
expected_call_args,
return_value=None,
):
TwoColumnOperatorTransformer("mul", ["a", "b"], "c")


class TestTwoColumnOperatorTransformerTransform:
@pytest.mark.parametrize(
"pd_method_name",
[
("mul"),
("div"),
("pow"),
],
)
def test_pandas_method_called(self, mocker, pd_method_name):
"""Test that the pandas method is called as expected (with kwargs passed) during transform."""
spy = mocker.spy(pd.DataFrame, pd_method_name)

pd_method_kwargs = {"axis": 0}

data = d.create_df_11()
x = TwoColumnOperatorTransformer(
pd_method_name,
["a", "b"],
"c",
)
x.transform(data)

# pull out positional and keyword args to target the call
print(spy.call_args_list)
call_args = spy.call_args_list[0]
call_pos_args = call_args[0]
call_kwargs = call_args[1]

# test keyword are as expected
ta.equality.assert_dict_equal_msg(
actual=call_kwargs,
expected=pd_method_kwargs,
msg_tag=f"""Keyword arg assert for '{pd_method_name}'""",
)
class TestTransform(GenericTransformTests):
"""Tests for transformer.transform."""

# test positional args are as expected
ta.equality.assert_list_tuple_equal_msg(
actual=call_pos_args,
# 'a' is indexed as a list here because that's how DataFrameMethodTransformer.__init__ stores the columns attribute
expected=(data[["a"]], data["b"]),
msg_tag=f"""Positional arg assert for {pd_method_name}""",
)
@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"

@pytest.mark.parametrize(
("pd_method_name", "output"),
Expand Down Expand Up @@ -184,3 +94,15 @@ def test_non_numeric_error(self):
match="TwoColumnOperatorTransformer: input columns in X must contain only numeric values",
):
x.transform(d.create_df_8())


class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour outside the three standard methods.

May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
"""

@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"
29 changes: 15 additions & 14 deletions tubular/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

from tubular.base import BaseTransformer, DataFrameMethodTransformer
from tubular.mixins import DropOriginalMixin
from tubular.mixins import DropOriginalMixin, NewColumnNameMixin, TwoColumnMixin


class BaseNumericTransformer(BaseTransformer):
Expand Down Expand Up @@ -312,7 +312,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
return X


class TwoColumnOperatorTransformer(DataFrameMethodTransformer):
class TwoColumnOperatorTransformer(
NewColumnNameMixin,
TwoColumnMixin,
DataFrameMethodTransformer,
):
"""This transformer applies a pandas.DataFrame method to two columns (add, sub, mul, div, mod, pow).

Transformer assigns the output of the method to a new column. The method will be applied
Expand Down Expand Up @@ -378,16 +382,9 @@ def __init__(
msg = f"{self.classname()}: pd_method_kwargs 'axis' must be 0 or 1"
raise ValueError(msg)

if type(columns) is not list:
msg = f"{self.classname()}: columns must be a list containing two column names but got {columns}"
raise TypeError(msg)

if len(columns) != 2:
msg = f"{self.classname()}: columns must be a list containing two column names but got {columns}"
raise ValueError(msg)

self.column1_name = columns[0]
self.column2_name = columns[1]
# check_and_set_new_column_name function needs to be called before calling DataFrameMethodTransformer.__init__
# DFTransformer uses 'new_column_names' not 'new_column_name' so generic tests fail on regex if not ordered in this way
self.check_and_set_new_column_name(new_column_name)

# call DataFrameMethodTransformer.__init__
# This class will inherit all the below attributes from DataFrameMethodTransformer
Expand All @@ -399,6 +396,10 @@ def __init__(
**kwargs,
)

self.check_two_columns(columns)
self.column1_name = columns[0]
self.column2_name = columns[1]

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform input data by applying the chosen method to the two specified columns.

Expand All @@ -410,7 +411,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-------
pd.DataFrame: Input X with an additional column.
"""
# call BaseTransformer.transform
# call DataFrameMethodTransformer.transform
X = super(DataFrameMethodTransformer, self).transform(X)

is_numeric = X[self.columns].apply(pd.api.types.is_numeric_dtype, axis=0)
Expand All @@ -419,7 +420,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
msg = f"{self.classname()}: input columns in X must contain only numeric values"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

N: noticed an error on line 414:

'# call BaseTransformer.transform
X = super(DataFrameMethodTransformer, self).transform(X)'

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

raise TypeError(msg)

X[self.new_column_names] = getattr(X[[self.column1_name]], self.pd_method_name)(
X[self.new_column_name] = getattr(X[[self.column1_name]], self.pd_method_name)(
X[self.column2_name],
**self.pd_method_kwargs,
)
Expand Down
Loading