Skip to content

Commit

Permalink
Merge pull request #291 from lvgig/feature/TwoColumnOperator
Browse files Browse the repository at this point in the history
Feature/two column operator
  • Loading branch information
davidhopkinson26 authored Sep 4, 2024
2 parents a21d886 + ffbbe97 commit aaafda0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 131 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Changed
- Refactored MeanResponseTransformer tests in new format `#262 <https://github.com/lvgig/tubular/pull/262>`_
- refactored build tools and package config into pyproject.toml `#271 <https://github.com/lvgig/tubular/pull/271>`_
- set up automatic versioning using setuptools-scm `#271 <https://github.com/lvgig/tubular/pull/271>`_
- Refactored TwoColumnOperatorTransformer tests in new format `#283 <https://github.com/lvgig/tubular/issues/274>`_


1.3.1 (2024-07-18)
Expand Down
2 changes: 1 addition & 1 deletion tests/base_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def test_columns_non_list_error(
minimal_attribute_dict,
uninitialized_transformers,
):
"""Test an error is raised if columns is not passed as a string not a list."""
"""Test an error is raised if columns is passed as a string not a list."""

args = minimal_attribute_dict[self.transformer_name].copy()
args["columns"] = non_list
Expand Down
154 changes: 38 additions & 116 deletions tests/numeric/test_TwoColumnOperatorTransformer.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,33 @@
import pandas as pd
import pytest
import test_aide as ta

import tests.test_data as d
import tubular
from tests.base_tests import (
GenericTransformTests,
NewColumnNameInitMixintests,
OtherBaseBehaviourTests,
TwoColumnListInitTests,
)
from tubular.numeric import TwoColumnOperatorTransformer

# @pytest.fixture(scope="module", autouse=True)
# def example_transformer():
# return TwoColumnOperatorTransformer(
# "mul",
# ["a", "b"],
# "c",
# )

@pytest.fixture(scope="module", autouse=True)
def example_transformer():
return TwoColumnOperatorTransformer(
"mul",
["a", "b"],
"c",
)


class TestTwoColumnOperatorTransformerInit:
"""Tests for TwoColumnMethodTransformer.__init__()."""

def test_column_type_error(self):
"""Checks that an error is raised if the column type is not a list."""
with pytest.raises(
TypeError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
"a, b",
"c",
pd_method_kwargs={"axis": 1},
)

def test_column_size_1_error(self):
"""Checks that the column is of length 2."""
with pytest.raises(
ValueError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
["a"],
"c",
pd_method_kwargs={"axis": 1},
)
class TestInit(
NewColumnNameInitMixintests,
TwoColumnListInitTests,
):
"""Generic tests for transformer.init()."""

def test_empty_list_error(self):
"""Checks that the list is not empty"""
with pytest.raises(
ValueError,
match="columns must be a list containing two column names",
):
TwoColumnOperatorTransformer(
"mul",
[],
"c",
pd_method_kwargs={"axis": 1},
)
@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"

def test_axis_not_present_error(self):
"""Checks that an error is raised if no axis element present in pd_method_kwargs dict."""
Expand All @@ -76,74 +47,13 @@ def test_axis_not_valid_error(self):
pd_method_kwargs={"axis": 2},
)

# TODO replace this with behaviour tests for DataFrameMethodTransformer init error handling
def test_DataFrameMethodTransformer_init_call(self, mocker):
"""Tests that the .__init__ method is called from the parent DataFrameMethodTransformer class."""
expected_call_args = {
0: {
"args": (),
"kwargs": {
"new_column_names": "c",
"pd_method_name": "mul",
"columns": ["a", "b"],
"pd_method_kwargs": {"axis": 0},
},
},
}

with ta.functions.assert_function_call(
mocker,
tubular.base.DataFrameMethodTransformer,
"__init__",
expected_call_args,
return_value=None,
):
TwoColumnOperatorTransformer("mul", ["a", "b"], "c")


class TestTwoColumnOperatorTransformerTransform:
@pytest.mark.parametrize(
"pd_method_name",
[
("mul"),
("div"),
("pow"),
],
)
def test_pandas_method_called(self, mocker, pd_method_name):
"""Test that the pandas method is called as expected (with kwargs passed) during transform."""
spy = mocker.spy(pd.DataFrame, pd_method_name)

pd_method_kwargs = {"axis": 0}

data = d.create_df_11()
x = TwoColumnOperatorTransformer(
pd_method_name,
["a", "b"],
"c",
)
x.transform(data)

# pull out positional and keyword args to target the call
print(spy.call_args_list)
call_args = spy.call_args_list[0]
call_pos_args = call_args[0]
call_kwargs = call_args[1]

# test keyword are as expected
ta.equality.assert_dict_equal_msg(
actual=call_kwargs,
expected=pd_method_kwargs,
msg_tag=f"""Keyword arg assert for '{pd_method_name}'""",
)
class TestTransform(GenericTransformTests):
"""Tests for transformer.transform."""

# test positional args are as expected
ta.equality.assert_list_tuple_equal_msg(
actual=call_pos_args,
# 'a' is indexed as a list here because that's how DataFrameMethodTransformer.__init__ stores the columns attribute
expected=(data[["a"]], data["b"]),
msg_tag=f"""Positional arg assert for {pd_method_name}""",
)
@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"

@pytest.mark.parametrize(
("pd_method_name", "output"),
Expand Down Expand Up @@ -184,3 +94,15 @@ def test_non_numeric_error(self):
match="TwoColumnOperatorTransformer: input columns in X must contain only numeric values",
):
x.transform(d.create_df_8())


class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour outside the three standard methods.
May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
"""

@classmethod
def setup_class(cls):
cls.transformer_name = "TwoColumnOperatorTransformer"
29 changes: 15 additions & 14 deletions tubular/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

from tubular.base import BaseTransformer, DataFrameMethodTransformer
from tubular.mixins import DropOriginalMixin
from tubular.mixins import DropOriginalMixin, NewColumnNameMixin, TwoColumnMixin


class BaseNumericTransformer(BaseTransformer):
Expand Down Expand Up @@ -312,7 +312,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
return X


class TwoColumnOperatorTransformer(DataFrameMethodTransformer):
class TwoColumnOperatorTransformer(
NewColumnNameMixin,
TwoColumnMixin,
DataFrameMethodTransformer,
):
"""This transformer applies a pandas.DataFrame method to two columns (add, sub, mul, div, mod, pow).
Transformer assigns the output of the method to a new column. The method will be applied
Expand Down Expand Up @@ -378,16 +382,9 @@ def __init__(
msg = f"{self.classname()}: pd_method_kwargs 'axis' must be 0 or 1"
raise ValueError(msg)

if type(columns) is not list:
msg = f"{self.classname()}: columns must be a list containing two column names but got {columns}"
raise TypeError(msg)

if len(columns) != 2:
msg = f"{self.classname()}: columns must be a list containing two column names but got {columns}"
raise ValueError(msg)

self.column1_name = columns[0]
self.column2_name = columns[1]
# check_and_set_new_column_name function needs to be called before calling DataFrameMethodTransformer.__init__
# DFTransformer uses 'new_column_names' not 'new_column_name' so generic tests fail on regex if not ordered in this way
self.check_and_set_new_column_name(new_column_name)

# call DataFrameMethodTransformer.__init__
# This class will inherit all the below attributes from DataFrameMethodTransformer
Expand All @@ -399,6 +396,10 @@ def __init__(
**kwargs,
)

self.check_two_columns(columns)
self.column1_name = columns[0]
self.column2_name = columns[1]

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""Transform input data by applying the chosen method to the two specified columns.
Expand All @@ -410,7 +411,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-------
pd.DataFrame: Input X with an additional column.
"""
# call BaseTransformer.transform
# call DataFrameMethodTransformer.transform
X = super(DataFrameMethodTransformer, self).transform(X)

is_numeric = X[self.columns].apply(pd.api.types.is_numeric_dtype, axis=0)
Expand All @@ -419,7 +420,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
msg = f"{self.classname()}: input columns in X must contain only numeric values"
raise TypeError(msg)

X[self.new_column_names] = getattr(X[[self.column1_name]], self.pd_method_name)(
X[self.new_column_name] = getattr(X[[self.column1_name]], self.pd_method_name)(
X[self.column2_name],
**self.pd_method_kwargs,
)
Expand Down

0 comments on commit aaafda0

Please sign in to comment.