CodeBeaverDemo · CodeBeaverDemo · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
diff --git a/codebeaver.yml b/codebeaver.yml
@@ -0,0 +1,2 @@
+from: pytest
+# This file was generated automatically by CodeBeaver based on your repository. Learn how to customize it here: https://docs.codebeaver.ai/open-source/codebeaver-yml/
diff --git a/mlxtend/data/tests/test_iris.py b/mlxtend/data/tests/test_iris.py
@@ -35,3 +35,88 @@ def test_iris_invalid_choice():
     with pytest.raises(ValueError) as excinfo:
         iris_data(version="bla")
         assert excinfo.value.message == "version must be 'uci' or 'corrected'."
+
+def test_iris_data_invalid_version_type():
+    """Test that providing a non-string version value raises a ValueError."""
+    with pytest.raises(ValueError) as excinfo:
+        iris_data(version=None)
+    # Check that the error message contains the expected text
+    assert "version must be 'uci' or 'corrected'" in str(excinfo.value)
+
+def test_iris_data_dtype_and_shape():
+    """Test that iris_data returns numpy arrays with expected dtypes and shapes for both versions."""
+    for version in ["uci", "corrected"]:
+        X, y = iris_data(version=version)
+        # Check types: X should be a float array, y an integer array.
+        assert isinstance(X, np.ndarray)
+        assert isinstance(y, np.ndarray)
+        # Check expected shapes: there are 150 samples and 4 features
+        assert X.shape == (150, 4)
+        assert y.shape == (150,)
+        # Check that X's dtype is float and y's dtype is a kind of integer.
+        assert X.dtype in [np.float64, np.float32]
+        assert np.issubdtype(y.dtype, np.integer)
+
+def test_iris_data_file_not_found(monkeypatch):
+    """Test that iris_data propagates file not found errors from np.genfromtxt."""
+    def fake_genfromtxt(*args, **kwargs):
+        raise IOError("File not found")
+    # Patch np.genfromtxt so that it raises an IOError to simulate a missing file.
+    monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
+    with pytest.raises(IOError) as excinfo:
+        iris_data(version="uci")
+    assert "File not found" in str(excinfo.value)
+def test_iris_data_empty_version(monkeypatch):
+    """Test that providing an empty string as version raises a ValueError."""
+    with pytest.raises(ValueError) as excinfo:
+        iris_data(version="")
+    # Check that the error message contains the expected text
+    assert "version must be 'uci' or 'corrected'" in str(excinfo.value)
+
+def test_iris_data_uppercase_version(monkeypatch):
+    """Test that providing an uppercase version string (e.g., 'UCI') raises a ValueError."""
+    with pytest.raises(ValueError) as excinfo:
+        iris_data(version="UCI")
+    # Check that the error message contains the expected text
+    assert "version must be 'uci' or 'corrected'" in str(excinfo.value)
+
+def test_iris_data_incorrect_shape(monkeypatch):
+    """Test that iris_data raises an IndexError when the data shape is insufficient for the 'corrected' version.
+    This simulates a scenario where np.genfromtxt returns an array with too few rows.
+    """
+    def fake_genfromtxt(*args, **kwargs):
+        # Simulate a small array with only 30 rows (instead of the expected 150) and 5 columns
+        return np.zeros((30, 5))
+
+    monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
+    with pytest.raises(IndexError):
+        iris_data(version="corrected")
+
+def test_iris_data_returns_distinct_arrays():
+    """Test that iris_data returns distinct array objects on consecutive calls,
+    so that modifications to one do not affect the other.
+    """
+    iris_x1, iris_y1 = iris_data()
+    iris_x2, iris_y2 = iris_data()
+    # Check that the returned arrays are not the same objects in memory
+    assert iris_x1 is not iris_x2
+    assert iris_y1 is not iris_y2
+def test_iris_data_numeric_version():
+    """Test that providing a non-string numeric version (e.g., 123) raises a ValueError."""
+    with pytest.raises(ValueError) as excinfo:
+        iris_data(version=123)
+    # Check that the error message contains the expected text
+    assert "version must be 'uci' or 'corrected'" in str(excinfo.value)
+
+def test_iris_data_whitespace_version():
+    """Test that providing a version string with extra whitespace (e.g., ' uci ') raises a ValueError."""
+    with pytest.raises(ValueError) as excinfo:
+        iris_data(version=" uci ")
+    # Check that the error message contains the expected text
+    assert "version must be 'uci' or 'corrected'" in str(excinfo.value)
+
+def test_iris_data_empty_file(monkeypatch):
+    """Test that iris_data raises an IndexError when np.genfromtxt returns an empty array (simulating an empty data file)."""
+    monkeypatch.setattr(np, "genfromtxt", lambda *args, **kwargs: np.array([]))
+    with pytest.raises(IndexError):
+        iris_data(version="uci")
diff --git a/tests/test_autompg.py b/tests/test_autompg.py
@@ -0,0 +1,140 @@
+import numpy as np
+import pytest
+import os
+from mlxtend.data.autompg import autompg_data
+
+def test_autompg_data_returns_correct_arrays(monkeypatch):
+    """Test that autompg_data returns correct X and y arrays given valid input data."""
+    # Create dummy data with shape (3, 6): 5 features + 1 target (3 samples, 6 columns)
+    dummy_data = np.array([
+        [1, 2, 3, 4, 5, 6],
+        [7, 8, 9, 10, 11, 12],
+        [13, 14, 15, 16, 17, 18]
+    ])
+
+    # Monkeypatch np.genfromtxt to return dummy_data regardless of file name or delimiter.
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+
+    # X should be all columns except the last one, y should be the last column.
+    expected_X = dummy_data[:, :-1]
+    expected_y = dummy_data[:, -1]
+    np.testing.assert_array_equal(X, expected_X)
+    np.testing.assert_array_equal(y, expected_y)
+
+def test_autompg_data_empty(monkeypatch):
+    """Test that autompg_data returns empty arrays when input data is empty."""
+    # Create an empty dummy data with 5 columns (4 features + 1 target).
+    dummy_data = np.empty((0, 5))
+
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+
+    assert X.shape == (0, 4)  # since dummy_data has 5 columns and X excludes the last column
+    assert y.shape == (0,)
+
+def test_autompg_data_invalid_input(monkeypatch):
+    """Test that autompg_data raises an error when data is invalid (e.g., 1-dimensional)."""
+    # Return a 1D array instead of a 2D array, which will cause slicing to fail.
+    dummy_data = np.array([1, 2, 3, 4, 5])
+
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    with pytest.raises(IndexError):
+        autompg_data()
+def test_autompg_data_single_sample(monkeypatch):
+    """Test that autompg_data correctly parses a dataset with a single sample."""
+    # Create dummy data with a single sample (row) with 6 columns (5 features + 1 target)
+    dummy_data = np.array([[10, 20, 30, 40, 50, 60]])
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+    expected_X = dummy_data[:, :-1]
+    expected_y = dummy_data[:, -1]
+    np.testing.assert_array_equal(X, expected_X)
+    np.testing.assert_array_equal(y, expected_y)
+
+def test_autompg_data_one_column(monkeypatch):
+    """Test that autompg_data returns correct shapes when the input data has only one column.
+    In such a case, since X is taken as all columns except the last and the only column is the target,
+    X will be an empty array of shape (n, 0) and y will have shape (n,)."""
+    dummy_data = np.array([[100], [200], [300]])
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+    # X should have 0 columns since dummy_data has only one column
+    assert X.shape == (3, 0)
+    # y should be a 1-dimensional array of length 3
+    assert y.shape == (3,)
+
+def test_autompg_data_none(monkeypatch):
+    """Test that autompg_data raises a TypeError when np.genfromtxt returns None (no data)."""
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: None)
+    with pytest.raises(TypeError):
+        autompg_data()
+def test_autompg_data_calls_genfromtxt(monkeypatch):
+    """Test that autompg_data calls np.genfromtxt with the correct file path and delimiter."""
+    calls = []
+
+    def dummy_genfromtxt(fname, delimiter):
+        calls.append((fname, delimiter))
+        # Return dummy data with two samples:
+        # 2 features (all columns except target) and 1 target column.
+        return np.array([[1, 2, 3], [4, 5, 6]])
+
+    monkeypatch.setattr(np, "genfromtxt", dummy_genfromtxt)
+
+    X, y = autompg_data()
+
+    assert calls, "np.genfromtxt was not called"
+    fname, delim = calls[0]
+    # Check that the file path ends with the expected subdirectory/filename.
+    expected_ending = os.path.join("data", "autompg.csv.gz")
+    assert fname.endswith(expected_ending), "The file path used is incorrect."
+    assert delim == ",", "The delimiter used is not a comma."
+
+    # Check that X and y are correctly parsed:
+    expected_X = np.array([[1, 2], [4, 5]])
+    expected_y = np.array([3, 6])
+    np.testing.assert_array_equal(X, expected_X)
+    np.testing.assert_array_equal(y, expected_y)
+def test_autompg_data_with_nans(monkeypatch):
+    """Test that autompg_data returns arrays that correctly preserve np.nan values."""
+    # Create dummy data with np.nan values.
+    dummy_data = np.array([
+        [1.0, np.nan, 3.0, 4.0],
+        [5.0, 6.0, np.nan, 8.0]
+    ])
+    # Monkeypatch np.genfromtxt to return dummy_data
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+    expected_X = dummy_data[:, :-1]
+    expected_y = dummy_data[:, -1]
+    np.testing.assert_array_equal(X, expected_X)
+    np.testing.assert_array_equal(y, expected_y)
+
+def test_autompg_data_list_input(monkeypatch):
+    """Test that autompg_data raises a TypeError when np.genfromtxt returns a list instead of an ndarray."""
+    # Return a normal Python list rather than a NumPy array.
+    dummy_data = [[1, 2, 3], [4, 5, 6]]
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    with pytest.raises(TypeError):
+        autompg_data()
+
+def test_autompg_data_non_numeric(monkeypatch):
+    """Test that autompg_data correctly parses datasets containing non-numeric (string) values."""
+    dummy_data = np.array([
+        ["a", "b", "c"],
+        ["d", "e", "f"]
+    ])
+    monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)
+
+    X, y = autompg_data()
+    expected_X = dummy_data[:, :-1]
+    expected_y = dummy_data[:, -1]
+    np.testing.assert_array_equal(X, expected_X)
+    np.testing.assert_array_equal(y, expected_y)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from: pytest
		# This file was generated automatically by CodeBeaver based on your repository. Learn how to customize it here: https://docs.codebeaver.ai/open-source/codebeaver-yml/