Skip to content

Commit

Permalink
Merge branch 'main' into 256-all-configs-within-pyprojecttoml-for-a-s…
Browse files Browse the repository at this point in the history
…treamlined-build-experience
  • Loading branch information
davidhopkinson26 authored Aug 5, 2024
2 parents c066cef + 0577fc8 commit d964161
Show file tree
Hide file tree
Showing 17 changed files with 384 additions and 494 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ Each individual change should have a link to the pull request after the descript

1.3.2 (unreleased)
------------------

Changed
^^^^^^^

- Refactored BaseDateTransformer, BaseDateTwoColumnTransformer and associated testing `#273 <https://github.com/lvgig/tubular/pull/273>`_
- BaseTwoColumnTransformer removed in favour of mixin classes TwoColumnMixin and NewColumnNameMixin to handle validation of two columns and new_column_name arguments `#273 <https://github.com/lvgig/tubular/pull/273>`_
- Refactored tests for InteractionTransformer `#283 <https://github.com/lvgig/tubular/pull/283>`_
- refactored build tools and package config into pyproject.toml `#271 <https://github.com/lvgig/tubular/pull/271>`_
- set up automatic versioning using setuptools-scm `#271 <https://github.com/lvgig/tubular/pull/271>`_


1.3.1 (2024-07-18)
------------------
Changed
Expand All @@ -36,6 +39,7 @@ Changed
- Created BaseNumericTransformer class to support test refactor of numeric file
- Updated testing approach for LogTransformer


1.3.0 (2024-06-13)
------------------
Added
Expand Down Expand Up @@ -75,6 +79,7 @@ Changed
- Refactored OHE transformer tests to align with new testing framework.
- Moved fixtures relating only to a single test out of conftest and into testing script where utilised.
- !!!Introduced dependency on Sklearn's OneHotEncoder by adding test to check OHE transformer (which we are calling from within our OHE wrapper) is fit before transform
- Refactored NearestMeanResponseImputer in line with new testing framework.


Removed
Expand Down
3 changes: 2 additions & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ We have some general requirements for all contributions then specific requiremen

Set up development environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For External contributors, first create your own fork of this repo.

First clone the repository;
Then clone the fork (or this repository if internal);

.. code::
Expand Down
53 changes: 30 additions & 23 deletions tests/base_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,36 @@ def test_drop_column_arg_errors(
uninitialized_transformers[self.transformer_name](**args)


class NewColumnNameInitMixintests:
"""
Tests for BaseTransformer.init() behaviour specific to when a transformer accepts a "new_column_name" column.
Note this deliberately avoids starting with "Tests" so that the tests are not run on import.
"""

@pytest.mark.parametrize(
"new_column_type",
[1, True, {"a": 1}, [1, 2], None, np.inf, np.nan],
)
def test_new_column_name_type_error(
self,
new_column_type,
minimal_attribute_dict,
uninitialized_transformers,
):
"""Test an error is raised if any type other than str passed to new_column_name"""

args = minimal_attribute_dict[self.transformer_name].copy()
args["new_column_name"] = new_column_type

with pytest.raises(
TypeError,
match=re.escape(
f"{self.transformer_name}: new_column_name should be str",
),
):
uninitialized_transformers[self.transformer_name](**args)


class WeightColumnInitMixinTests:
"""
Tests for BaseTransformer.init() behaviour specific to when a transformer takes accepts a weight column.
Expand Down Expand Up @@ -214,29 +244,6 @@ def test_list_length_error(
):
uninitialized_transformers[self.transformer_name](**args)

@pytest.mark.parametrize(
"new_column_type",
[1, True, {"a": 1}, [1, 2], None, np.inf, np.nan],
)
def test_new_column_name_type_error(
self,
new_column_type,
minimal_attribute_dict,
uninitialized_transformers,
):
"""Test an error is raised if any type other than str passed to new_column_name"""

args = minimal_attribute_dict[self.transformer_name].copy()
args["new_col_name"] = new_column_type

with pytest.raises(
TypeError,
match=re.escape(
f"{self.transformer_name}: new_col_name should be str",
),
):
uninitialized_transformers[self.transformer_name](**args)


class GenericFitTests:
"""
Expand Down
11 changes: 8 additions & 3 deletions tests/comparison/test_EqualityChecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
DropOriginalInitMixinTests,
GenericFitTests,
GenericTransformTests,
NewColumnNameInitMixintests,
OtherBaseBehaviourTests,
TwoColumnListInitTests,
)
from tubular.comparison import EqualityChecker


class TestInit(DropOriginalInitMixinTests, TwoColumnListInitTests):
class TestInit(
DropOriginalInitMixinTests,
NewColumnNameInitMixintests,
TwoColumnListInitTests,
):
"""Generic tests for transformer.init()."""

@classmethod
Expand Down Expand Up @@ -48,7 +53,7 @@ def test_expected_output(self, test_dataframe):

example_transformer = EqualityChecker(
columns=["b", "c"],
new_col_name="bool_logic",
new_column_name="bool_logic",
)
actual = example_transformer.transform(test_dataframe)

Expand All @@ -73,7 +78,7 @@ def test_expected_output_dropped(self, test_dataframe):

example_transformer = EqualityChecker(
columns=["b", "c"],
new_col_name="bool_logic",
new_column_name="bool_logic",
drop_original=True,
)
actual = example_transformer.transform(test_dataframe)
Expand Down
15 changes: 10 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from tests.test_data import (
create_is_between_dates_df_1,
create_numeric_df_1,
create_numeric_df_2,
create_object_df,
)

Expand Down Expand Up @@ -136,10 +137,6 @@ def minimal_attribute_dict():
"BaseTransformer": {
"columns": ["a"],
},
"BaseTwoColumnTransformer": {
"columns": ["a", "b"],
"new_col_name": "c",
},
"BetweenDatesTransformer": {
"new_column_name": "c",
"columns": ["a", "c", "b"],
Expand Down Expand Up @@ -194,7 +191,7 @@ def minimal_attribute_dict():
},
"EqualityChecker": {
"columns": ["a", "b"],
"new_col_name": "c",
"new_column_name": "c",
"drop_original": True,
},
"GroupRareLevelsTransformer": {
Expand Down Expand Up @@ -293,6 +290,7 @@ def minimal_dataframe_lookup() -> dict[str, pd.DataFrame]:
"""

num_df = create_numeric_df_1()
nan_df = create_numeric_df_2()
object_df = create_object_df()
date_df = create_is_between_dates_df_1()

Expand Down Expand Up @@ -324,6 +322,13 @@ def minimal_dataframe_lookup() -> dict[str, pd.DataFrame]:
for transformer in other_num_transformers:
min_df_dict[transformer] = num_df

# Some transformers require missing values to work
other_nan_transformers = [
"NearestMeanResponseImputer",
]
for transformer in other_nan_transformers:
min_df_dict[transformer] = nan_df

return min_df_dict


Expand Down
153 changes: 153 additions & 0 deletions tests/dates/test_BaseDateTransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import datetime

import pandas as pd
import pytest

from tests.base_tests import (
ColumnStrListInitTests,
DropOriginalInitMixinTests,
GenericFitTests,
GenericTransformTests,
NewColumnNameInitMixintests,
OtherBaseBehaviourTests,
)


def create_date_diff_different_dtypes():
"""Dataframe with different datetime formats"""
return pd.DataFrame(
{
"date_col_1": [
datetime.date(1993, 9, 27),
datetime.date(2000, 3, 19),
datetime.date(2018, 11, 10),
datetime.date(2018, 10, 10),
datetime.date(2018, 10, 10),
datetime.date(2018, 10, 10),
datetime.date(2018, 12, 10),
datetime.date(
1985,
7,
23,
),
],
"date_col_2": [
datetime.date(2020, 5, 1),
datetime.date(2019, 12, 25),
datetime.date(2018, 11, 10),
datetime.date(2018, 11, 10),
datetime.date(2018, 9, 10),
datetime.date(2015, 11, 10),
datetime.date(2015, 11, 10),
datetime.date(2015, 7, 23),
],
"datetime_col_1": [
datetime.datetime(1993, 9, 27, tzinfo=datetime.timezone.utc),
datetime.datetime(2000, 3, 19, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 11, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 10, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 10, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 10, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 12, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(
1985,
7,
23,
tzinfo=datetime.timezone.utc,
),
],
"datetime_col_2": [
datetime.datetime(2020, 5, 1, tzinfo=datetime.timezone.utc),
datetime.datetime(2019, 12, 25, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 11, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 11, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2018, 9, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2015, 11, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2015, 11, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2015, 7, 23, tzinfo=datetime.timezone.utc),
],
},
)


class DatesMixinTransformTests:
"""Generic tests for Dates Transformers"""

@pytest.mark.parametrize(
("columns, datetime_col, date_col"),
[
(["date_col_1", "datetime_col_2"], 1, 0),
(["datetime_col_1", "date_col_2"], 0, 1),
],
)
def test_mismatched_datetypes_error(
self,
columns,
datetime_col,
date_col,
uninitialized_transformers,
):
"Test that transform raises an error if one column is a date and one is datetime"

x = uninitialized_transformers[self.transformer_name](
columns=columns,
new_column_name="c",
)

df = create_date_diff_different_dtypes()
# types don't seem to come out of the above function as expected, hard enforce
for col in ["date_col_1", "date_col_2"]:
df[col] = pd.to_datetime(df[col]).dt.date

for col in ["datetime_col_1", "datetime_col_2"]:
df[col] = pd.to_datetime(df[col])

present_types = (
{"datetime64", "date"} if datetime_col == 0 else {"date", "datetime64"}
)
msg = rf"Columns fed to datetime transformers should be \['datetime64', 'date'\] and have consistent types, but found {present_types}. Please use ToDatetimeTransformer to standardise"
with pytest.raises(
TypeError,
match=msg,
):
x.transform(df)


class TestInit(
NewColumnNameInitMixintests,
DropOriginalInitMixinTests,
ColumnStrListInitTests,
):
"""Generic tests for transformer.init()."""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseDateTransformer"


class TestFit(GenericFitTests):
"""Generic tests for transformer.fit()"""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseDateTransformer"


class TestTransform(GenericTransformTests, DatesMixinTransformTests):
"""Tests for BaseDateTransformer.transform."""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseDateTransformer"


class TestOtherBaseBehaviour(OtherBaseBehaviourTests):
"""
Class to run tests for BaseTransformerBehaviour outside the three standard methods.
May need to overwite specific tests in this class if the tested transformer modifies this behaviour.
"""

@classmethod
def setup_class(cls):
cls.transformer_name = "BaseDateTransformer"
Loading

0 comments on commit d964161

Please sign in to comment.