From 60eb0d8a871fec606ea942abc6d4d669bd2d3186 Mon Sep 17 00:00:00 2001 From: ChaitanMohr <119445474+ChaitanMohr@users.noreply.github.com> Date: Fri, 9 Feb 2024 11:47:49 +0000 Subject: [PATCH 1/5] changed LogTransformer add_1 to use log1p instead --- tubular/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubular/numeric.py b/tubular/numeric.py index f36fdbb2..5b632184 100644 --- a/tubular/numeric.py +++ b/tubular/numeric.py @@ -122,10 +122,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: raise ValueError(msg) if self.base is None: - X[new_column_names] = np.log(X[self.columns] + 1) + X[new_column_names] = np.log1p(X[self.columns]) else: - X[new_column_names] = np.log(X[self.columns] + 1) / np.log(self.base) + X[new_column_names] = np.log1p(X[self.columns]) / np.log(self.base) else: if (X[self.columns] <= 0).sum().sum() > 0: From 333ddcb47c823b0b7afd53d2fb49341ab87d3f44 Mon Sep 17 00:00:00 2001 From: ChaitanMohr <119445474+ChaitanMohr@users.noreply.github.com> Date: Sun, 11 Feb 2024 22:57:30 +0000 Subject: [PATCH 2/5] changed unit test to accomodate log1p --- tests/numeric/test_LogTransformer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/numeric/test_LogTransformer.py b/tests/numeric/test_LogTransformer.py index 3bcc924e..c07e0522 100644 --- a/tests/numeric/test_LogTransformer.py +++ b/tests/numeric/test_LogTransformer.py @@ -96,8 +96,8 @@ def expected_df_2(): """Expected output of test_expected_output_2.""" df = d.create_df_3() - df["a_new_col"] = np.log(df["a"] + 1) - df["b_new_col"] = np.log(df["b"] + 1) + df["a_new_col"] = np.log1p(df["a"]) + df["b_new_col"] = np.log1p(df["b"]) return df.drop(columns=["a", "b"]) @@ -114,8 +114,8 @@ def expected_df_4(): """Expected output of test_expected_output_4.""" df = d.create_df_3() - df["a_new_col"] = np.log(df["a"] + 1) - df["b_new_col"] = np.log(df["b"] + 1) + df["a_new_col"] = np.log1p(df["a"]) + df["b_new_col"] = np.log1p(df["b"]) return df From 8b9b047dc637cb2612d158864f197ba8d8ccbf90 Mon Sep 17 00:00:00 2001 From: ChaitanMohr <119445474+ChaitanMohr@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:36:34 +0000 Subject: [PATCH 3/5] updated changelog and version --- CHANGELOG.rst | 7 +++++++ tubular/_version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index da22a368..303098a6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,13 @@ Subsections for each version can be one of the following; Each individual change should have a link to the pull request after the description of the change. +1.2.2 (2024-02-15) +------------------ +Changed +^^^^^^^ +- Changed LogTransformer to use log1p(x) instead of log(x+1) `#178 `_ +- Changed unit tests using log(x+1) to log1p(x) `#178 `_ + 1.2.0 (2024-02-06) ------------------ Added diff --git a/tubular/_version.py b/tubular/_version.py index c68196d1..bc86c944 100644 --- a/tubular/_version.py +++ b/tubular/_version.py @@ -1 +1 @@ -__version__ = "1.2.0" +__version__ = "1.2.2" From 509822eab86ae444bd7e743c597c1d957943c7cc Mon Sep 17 00:00:00 2001 From: ChaitanMohr <119445474+ChaitanMohr@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:43:17 +0000 Subject: [PATCH 4/5] added unit test for log1p being well conditioned --- CHANGELOG.rst | 6 +++++- tests/numeric/test_LogTransformer.py | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4c5919df..c8e574be 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,8 +16,12 @@ Subsections for each version can be one of the following; Each individual change should have a link to the pull request after the description of the change. -1.2.2 (2024-02-15) +1.2.2 (2024-02-20) ------------------ +Added +^^^^^ +- Created unit test for checking if log1p is working and well conditioned for small x `#178 `_ + Changed ^^^^^^^ - Changed LogTransformer to use log1p(x) instead of log(x+1) `#178 `_ diff --git a/tests/numeric/test_LogTransformer.py b/tests/numeric/test_LogTransformer.py index c07e0522..03eb662a 100644 --- a/tests/numeric/test_LogTransformer.py +++ b/tests/numeric/test_LogTransformer.py @@ -135,6 +135,27 @@ def expected_df_6(): return df.drop("a", axis=1) + def test_log1p(self): + """Test that log1p is working as intended.""" + df = pd.DataFrame( + { + "a": [0.00001, 0.00002, 0.00003], + "b": [0.00004, 0.00005, 0.00006], + }, + ) + expected = pd.DataFrame( + { + "a_log": [9.999950e-06, 1.999980e-05, 2.999955e-05], + "b_log": [3.99992000e-05, 4.99987500e-05, 5.99982001e-05], + }, + ) + log_transformer = LogTransformer( + columns=["a", "b"], + add_1=True, + ) + actual = log_transformer.transform(df) + pd.testing.assert_frame_equal(actual, expected) + def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() From 47e54eb1ea1606e2c6c1946d202d147a07076c09 Mon Sep 17 00:00:00 2001 From: ChaitanMohr <119445474+ChaitanMohr@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:46:32 +0000 Subject: [PATCH 5/5] added comment --- tests/numeric/test_LogTransformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/numeric/test_LogTransformer.py b/tests/numeric/test_LogTransformer.py index 03eb662a..69c3c835 100644 --- a/tests/numeric/test_LogTransformer.py +++ b/tests/numeric/test_LogTransformer.py @@ -143,6 +143,7 @@ def test_log1p(self): "b": [0.00004, 0.00005, 0.00006], }, ) + # Values created using np.log1p() of original df expected = pd.DataFrame( { "a_log": [9.999950e-06, 1.999980e-05, 2.999955e-05],