Skip to content

Commit

Permalink
Merge pull request #178 from lvgig/feature/log_issue
Browse files Browse the repository at this point in the history
Change log(x+1) to log1p(x) in LogTransformer
  • Loading branch information
ChaitanMohr authored Feb 20, 2024
2 parents 8efb2af + 47e54eb commit 6e32db5
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ Subsections for each version can be one of the following;

Each individual change should have a link to the pull request after the description of the change.

1.2.2 (2024-02-20)
------------------
Added
^^^^^
- Created unit test for checking if log1p is working and well conditioned for small x `#178 <https://github.com/lvgig/tubular/pull/178>`_

Changed
^^^^^^^
- Changed LogTransformer to use log1p(x) instead of log(x+1) `#178 <https://github.com/lvgig/tubular/pull/178>`_
- Changed unit tests using log(x+1) to log1p(x) `#178 <https://github.com/lvgig/tubular/pull/178>`_

1.2.1 (2024-02-08)
------------------
Added
Expand Down
30 changes: 26 additions & 4 deletions tests/numeric/test_LogTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def expected_df_2():
"""Expected output of test_expected_output_2."""
df = d.create_df_3()

df["a_new_col"] = np.log(df["a"] + 1)
df["b_new_col"] = np.log(df["b"] + 1)
df["a_new_col"] = np.log1p(df["a"])
df["b_new_col"] = np.log1p(df["b"])

return df.drop(columns=["a", "b"])

Expand All @@ -114,8 +114,8 @@ def expected_df_4():
"""Expected output of test_expected_output_4."""
df = d.create_df_3()

df["a_new_col"] = np.log(df["a"] + 1)
df["b_new_col"] = np.log(df["b"] + 1)
df["a_new_col"] = np.log1p(df["a"])
df["b_new_col"] = np.log1p(df["b"])

return df

Expand All @@ -135,6 +135,28 @@ def expected_df_6():

return df.drop("a", axis=1)

def test_log1p(self):
"""Test that log1p is working as intended."""
df = pd.DataFrame(
{
"a": [0.00001, 0.00002, 0.00003],
"b": [0.00004, 0.00005, 0.00006],
},
)
# Values created using np.log1p() of original df
expected = pd.DataFrame(
{
"a_log": [9.999950e-06, 1.999980e-05, 2.999955e-05],
"b_log": [3.99992000e-05, 4.99987500e-05, 5.99982001e-05],
},
)
log_transformer = LogTransformer(
columns=["a", "b"],
add_1=True,
)
actual = log_transformer.transform(df)
pd.testing.assert_frame_equal(actual, expected)

def test_super_transform_called(self, mocker):
"""Test that BaseTransformer.transform called."""
df = d.create_df_3()
Expand Down
2 changes: 1 addition & 1 deletion tubular/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.2.1"
__version__ = "1.2.2"
4 changes: 2 additions & 2 deletions tubular/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
raise ValueError(msg)

if self.base is None:
X[new_column_names] = np.log(X[self.columns] + 1)
X[new_column_names] = np.log1p(X[self.columns])

else:
X[new_column_names] = np.log(X[self.columns] + 1) / np.log(self.base)
X[new_column_names] = np.log1p(X[self.columns]) / np.log(self.base)

else:
if (X[self.columns] <= 0).sum().sum() > 0:
Expand Down

0 comments on commit 6e32db5

Please sign in to comment.