Skip to content

Commit

Permalink
add test for encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
RektPunk committed Sep 11, 2024
1 parent 92a2c96 commit de8ba0a
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions tests/test_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
import pandas as pd
import pytest

from mqboost.base import XdataLike, YdataLike

Check failure on line 5 in tests/test_encoder.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/test_encoder.py#L5

'mqboost.base.XdataLike' imported but unused (F401)

Check failure on line 5 in tests/test_encoder.py

View workflow job for this annotation

GitHub Actions / Flake8

tests/test_encoder.py#L5

'mqboost.base.YdataLike' imported but unused (F401)
from mqboost.encoder import MQLabelEncoder


# Test data for categorical variables
@pytest.fixture
def sample_data():
return pd.Series(["apple", "banana", "orange", None, "kiwi", np.nan])


# Test data for label encoding
@pytest.fixture
def sample_label_data():
return np.array([2, 3, 5, 0, 4, 0])


def test_fit_transform(sample_data):
encoder = MQLabelEncoder()
transformed = encoder.fit_transform(sample_data)

# Check that the transformed result is numeric
assert transformed is not None
assert transformed.dtype == int
assert len(transformed) == len(sample_data)


def test_unseen_and_nan_values(sample_data):
encoder = MQLabelEncoder()
encoder.fit(sample_data)

# Include new unseen value and check behavior
test_data = pd.Series(["apple", "unknown", None, "melon", np.nan])
transformed = encoder.transform(test_data)

# Check for correct handling of unseen and NaN values
assert (
transformed
== encoder.label_encoder.transform(["apple", "Unseen", "NaN", "Unseen", "NaN"])
).all()

0 comments on commit de8ba0a

Please sign in to comment.