Skip to content
2 changes: 2 additions & 0 deletions codebeaver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from: pytest
# This file was generated automatically by CodeBeaver based on your repository. Learn how to customize it here: https://docs.codebeaver.ai/open-source/codebeaver-yml/
85 changes: 85 additions & 0 deletions mlxtend/data/tests/test_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,88 @@ def test_iris_invalid_choice():
with pytest.raises(ValueError) as excinfo:
iris_data(version="bla")
assert excinfo.value.message == "version must be 'uci' or 'corrected'."

def test_iris_data_invalid_version_type():
"""Test that providing a non-string version value raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=None)
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_dtype_and_shape():
"""Test that iris_data returns numpy arrays with expected dtypes and shapes for both versions."""
for version in ["uci", "corrected"]:
X, y = iris_data(version=version)
# Check types: X should be a float array, y an integer array.
assert isinstance(X, np.ndarray)
assert isinstance(y, np.ndarray)
# Check expected shapes: there are 150 samples and 4 features
assert X.shape == (150, 4)
assert y.shape == (150,)
# Check that X's dtype is float and y's dtype is a kind of integer.
assert X.dtype in [np.float64, np.float32]
assert np.issubdtype(y.dtype, np.integer)

def test_iris_data_file_not_found(monkeypatch):
"""Test that iris_data propagates file not found errors from np.genfromtxt."""
def fake_genfromtxt(*args, **kwargs):
raise IOError("File not found")
# Patch np.genfromtxt so that it raises an IOError to simulate a missing file.
monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
with pytest.raises(IOError) as excinfo:
iris_data(version="uci")
assert "File not found" in str(excinfo.value)
def test_iris_data_empty_version(monkeypatch):
"""Test that providing an empty string as version raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version="")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_uppercase_version(monkeypatch):
"""Test that providing an uppercase version string (e.g., 'UCI') raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version="UCI")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_incorrect_shape(monkeypatch):
"""Test that iris_data raises an IndexError when the data shape is insufficient for the 'corrected' version.
This simulates a scenario where np.genfromtxt returns an array with too few rows.
"""
def fake_genfromtxt(*args, **kwargs):
# Simulate a small array with only 30 rows (instead of the expected 150) and 5 columns
return np.zeros((30, 5))

monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
with pytest.raises(IndexError):
iris_data(version="corrected")

def test_iris_data_returns_distinct_arrays():
"""Test that iris_data returns distinct array objects on consecutive calls,
so that modifications to one do not affect the other.
"""
iris_x1, iris_y1 = iris_data()
iris_x2, iris_y2 = iris_data()
# Check that the returned arrays are not the same objects in memory
assert iris_x1 is not iris_x2
assert iris_y1 is not iris_y2
def test_iris_data_numeric_version():
"""Test that providing a non-string numeric version (e.g., 123) raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=123)
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_whitespace_version():
"""Test that providing a version string with extra whitespace (e.g., ' uci ') raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=" uci ")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_empty_file(monkeypatch):
"""Test that iris_data raises an IndexError when np.genfromtxt returns an empty array (simulating an empty data file)."""
monkeypatch.setattr(np, "genfromtxt", lambda *args, **kwargs: np.array([]))
with pytest.raises(IndexError):
iris_data(version="uci")
140 changes: 140 additions & 0 deletions tests/test_autompg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import numpy as np
import pytest
import os
from mlxtend.data.autompg import autompg_data

def test_autompg_data_returns_correct_arrays(monkeypatch):
"""Test that autompg_data returns correct X and y arrays given valid input data."""
# Create dummy data with shape (3, 6): 5 features + 1 target (3 samples, 6 columns)
dummy_data = np.array([
[1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12],
[13, 14, 15, 16, 17, 18]
])

# Monkeypatch np.genfromtxt to return dummy_data regardless of file name or delimiter.
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()

# X should be all columns except the last one, y should be the last column.
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_empty(monkeypatch):
"""Test that autompg_data returns empty arrays when input data is empty."""
# Create an empty dummy data with 5 columns (4 features + 1 target).
dummy_data = np.empty((0, 5))

monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()

assert X.shape == (0, 4) # since dummy_data has 5 columns and X excludes the last column
assert y.shape == (0,)

def test_autompg_data_invalid_input(monkeypatch):
"""Test that autompg_data raises an error when data is invalid (e.g., 1-dimensional)."""
# Return a 1D array instead of a 2D array, which will cause slicing to fail.
dummy_data = np.array([1, 2, 3, 4, 5])

monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

with pytest.raises(IndexError):
autompg_data()
def test_autompg_data_single_sample(monkeypatch):
"""Test that autompg_data correctly parses a dataset with a single sample."""
# Create dummy data with a single sample (row) with 6 columns (5 features + 1 target)
dummy_data = np.array([[10, 20, 30, 40, 50, 60]])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_one_column(monkeypatch):
"""Test that autompg_data returns correct shapes when the input data has only one column.
In such a case, since X is taken as all columns except the last and the only column is the target,
X will be an empty array of shape (n, 0) and y will have shape (n,)."""
dummy_data = np.array([[100], [200], [300]])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
# X should have 0 columns since dummy_data has only one column
assert X.shape == (3, 0)
# y should be a 1-dimensional array of length 3
assert y.shape == (3,)

def test_autompg_data_none(monkeypatch):
"""Test that autompg_data raises a TypeError when np.genfromtxt returns None (no data)."""
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: None)
with pytest.raises(TypeError):
autompg_data()
def test_autompg_data_calls_genfromtxt(monkeypatch):
"""Test that autompg_data calls np.genfromtxt with the correct file path and delimiter."""
calls = []

def dummy_genfromtxt(fname, delimiter):
calls.append((fname, delimiter))
# Return dummy data with two samples:
# 2 features (all columns except target) and 1 target column.
return np.array([[1, 2, 3], [4, 5, 6]])

monkeypatch.setattr(np, "genfromtxt", dummy_genfromtxt)

X, y = autompg_data()

assert calls, "np.genfromtxt was not called"
fname, delim = calls[0]
# Check that the file path ends with the expected subdirectory/filename.
expected_ending = os.path.join("data", "autompg.csv.gz")
assert fname.endswith(expected_ending), "The file path used is incorrect."
assert delim == ",", "The delimiter used is not a comma."

# Check that X and y are correctly parsed:
expected_X = np.array([[1, 2], [4, 5]])
expected_y = np.array([3, 6])
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)
def test_autompg_data_with_nans(monkeypatch):
"""Test that autompg_data returns arrays that correctly preserve np.nan values."""
# Create dummy data with np.nan values.
dummy_data = np.array([
[1.0, np.nan, 3.0, 4.0],
[5.0, 6.0, np.nan, 8.0]
])
# Monkeypatch np.genfromtxt to return dummy_data
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_list_input(monkeypatch):
"""Test that autompg_data raises a TypeError when np.genfromtxt returns a list instead of an ndarray."""
# Return a normal Python list rather than a NumPy array.
dummy_data = [[1, 2, 3], [4, 5, 6]]
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

with pytest.raises(TypeError):
autompg_data()

def test_autompg_data_non_numeric(monkeypatch):
"""Test that autompg_data correctly parses datasets containing non-numeric (string) values."""
dummy_data = np.array([
["a", "b", "c"],
["d", "e", "f"]
])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)
Loading