Skip to content

Commit 7ea90c7

Browse files
Remove JIT dep in memory leak check and reorganize tests (#830)
* Runs DataFrames tests that do not have jit dependencies in separate sessions on CI to avoid importing JIT before a non-jit test * Removes dependency on numba/compiler in memory_leak_check * Once scatter/gather don't use JIT, we can remove a majority of the tests from the "JIT dependency"
1 parent d9ddf48 commit 7ea90c7

31 files changed

+128
-60
lines changed

.github/workflows/_test_python_source.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
required: false
2424
default: 'ubuntu-latest'
2525
type: string
26-
# Options: DF_LIB, SPAWN, NORMAL
26+
# Options: DF_LIB, DF_LIB_NO_JIT, SPAWN, NORMAL
2727
test-type:
2828
type: string
2929
description: The kind of tests to run e.g. spawn tests
@@ -130,7 +130,7 @@ jobs:
130130
# Disabling the DataFrame library for spawn tests since some of the tests
131131
# create Pandas manager states for testing that are not fully functional.
132132
BODO_ENABLE_DATAFRAME_LIBRARY: ${{ inputs.test-type != 'SPAWN' && '1' || '0' }}
133-
BODO_ENABLE_TEST_DATAFRAME_LIBRARY: ${{ inputs.test-type == 'DF_LIB' && '1' || '0' }}
133+
BODO_ENABLE_TEST_DATAFRAME_LIBRARY: ${{ startsWith(inputs.test-type, 'DF_LIB') && '1' || '0' }}
134134
BODOSQL_PY4J_GATEWAY_PORT: "auto"
135135
BODO_SPAWN_MODE: "0"
136136
BODO_BUFFER_POOL_REMOTE_MODE: "1"

.github/workflows/pr_ci.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,19 @@ jobs:
137137
test-type: SPAWN
138138
secrets: inherit
139139

140+
df-lib-non-jit-ci:
141+
needs: [compile-bodo]
142+
name: Test DF Library Non-JIT
143+
uses: ./.github/workflows/_test_python_source.yml
144+
with:
145+
batch: 1
146+
total-batches: 1
147+
pytest-marker: "df_lib and not jit_dependency and not weekly and not iceberg"
148+
collect-coverage: true
149+
os: ${{ inputs.runner_os || 'ubuntu-latest' }}
150+
test-type: "DF_LIB_NO_JIT"
151+
secrets: inherit
152+
140153
df-lib-ci:
141154
needs: [compile-bodo]
142155
name: Test DF Library
@@ -149,7 +162,7 @@ jobs:
149162
with:
150163
batch: ${{ matrix.batch }}
151164
total-batches: 2
152-
pytest-marker: "df_lib and not weekly and not iceberg"
165+
pytest-marker: "df_lib and jit_dependency and not weekly and not iceberg"
153166
collect-coverage: true
154167
os: ${{ inputs.runner_os || 'ubuntu-latest' }}
155168
test-type: "DF_LIB"
@@ -241,6 +254,7 @@ jobs:
241254
needs:
242255
- collect-results
243256
- df-lib-ci
257+
- df-lib-non-jit-ci
244258
- validate
245259
runs-on: ubuntu-latest
246260
steps:

bodo/libs/_array.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -955,9 +955,38 @@ table_info* retrieve_table_py_entry(table_info* in_table,
955955
}
956956
}
957957

958+
// Macro to define a Python wrapper for a memory stats functions
959+
// (Avoids numba JIT dependency in testing)
960+
#define DEFINE_NOARG_STATS_WRAPPER(py_name, func_name) \
961+
static PyObject* py_name(PyObject* self, PyObject* args) { \
962+
if (PyTuple_Size(args) != 0) { \
963+
PyErr_SetString(PyExc_TypeError, \
964+
#func_name "() does not take arguments"); \
965+
return nullptr; \
966+
} \
967+
return PyLong_FromSize_t(func_name()); \
968+
}
969+
970+
DEFINE_NOARG_STATS_WRAPPER(get_stats_alloc_py_wrapper, get_stats_alloc)
971+
DEFINE_NOARG_STATS_WRAPPER(get_stats_free_py_wrapper, get_stats_free)
972+
DEFINE_NOARG_STATS_WRAPPER(get_stats_mi_alloc_py_wrapper, get_stats_mi_alloc)
973+
DEFINE_NOARG_STATS_WRAPPER(get_stats_mi_free_py_wrapper, get_stats_mi_free)
974+
975+
#undef DEFINE_NOARG_STATS_WRAPPER
976+
977+
static PyMethodDef array_ext_methods[] = {
978+
#define declmethod(func) {#func, (PyCFunction)func, METH_VARARGS, NULL}
979+
declmethod(get_stats_alloc_py_wrapper),
980+
declmethod(get_stats_free_py_wrapper),
981+
declmethod(get_stats_mi_alloc_py_wrapper),
982+
declmethod(get_stats_mi_free_py_wrapper),
983+
{nullptr},
984+
#undef declmethod
985+
};
986+
958987
PyMODINIT_FUNC PyInit_array_ext(void) {
959988
PyObject* m;
960-
MOD_DEF(m, "array_ext", "No docs", nullptr);
989+
MOD_DEF(m, "array_ext", "No docs", array_ext_methods);
961990
if (m == nullptr) {
962991
return nullptr;
963992
}
@@ -1040,10 +1069,6 @@ PyMODINIT_FUNC PyInit_array_ext(void) {
10401069
SetAttrStringFromVoidPtr(m, pd_pyarrow_array_from_bodo_array_py_entry);
10411070
SetAttrStringFromVoidPtr(m, string_array_from_sequence);
10421071
SetAttrStringFromVoidPtr(m, is_na_value);
1043-
SetAttrStringFromVoidPtr(m, get_stats_alloc);
1044-
SetAttrStringFromVoidPtr(m, get_stats_free);
1045-
SetAttrStringFromVoidPtr(m, get_stats_mi_alloc);
1046-
SetAttrStringFromVoidPtr(m, get_stats_mi_free);
10471072
SetAttrStringFromVoidPtr(m, array_info_getitem);
10481073
SetAttrStringFromVoidPtr(m, array_info_getdata1);
10491074
SetAttrStringFromVoidPtr(m, array_info_unpin);

bodo/pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ markers =
2727
tz_aware: timezone-aware datetime tests
2828
flaky: flaky tests
2929
df_lib: DataFrame library tests
30+
jit_dependency: marks Bodo DataFrames tests that require jit dependencies
31+
3032

3133
# Used for Azure NP=3
3234
bodosql_1of7: part 1/7 of the test suite

bodo/tests/conftest.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import shutil
1010
import subprocess
11+
import sys
1112
import time
1213
import traceback
1314
from collections.abc import Callable, Generator
@@ -94,7 +95,6 @@ def memory_leak_check():
9495
Equivalent to Numba's MemoryLeakMixin:
9596
https://github.com/numba/numba/blob/13ece9b97e6f01f750e870347f231282325f60c3/numba/tests/support.py#L688
9697
"""
97-
import bodo.decorators # isort:skip # noqa
9898
import bodo.tests.utils
9999
import bodo.utils.allocation_tracking
100100

@@ -120,6 +120,16 @@ def memory_leak_check():
120120
assert total_mi_alloc == total_mi_free
121121

122122

123+
@pytest.fixture(scope="function")
124+
def jit_import_check():
125+
"""Fixture used to assert that a test should not import JIT."""
126+
jit_already_imported = "bodo.decorators" in sys.modules
127+
yield
128+
assert jit_already_imported or "bodo.decorators" not in sys.modules, (
129+
"Test was not explicitly marked as a JIT dependency, but imported JIT."
130+
)
131+
132+
123133
def item_file_name(item):
124134
"""Get the name of a pytest item. Uses the default pytest implementation, except for C++ tests, where we return the cached name"""
125135
if isinstance(item, (CppTestFile, CppTestItem)):
@@ -259,6 +269,24 @@ def pytest_collection_modifyitems(items):
259269
for marker in markers:
260270
item.add_marker(marker)
261271

272+
# If running tests in DataFrames mode, add a check that JIT is not imported to all tests
273+
# that don't explicitly depend on JIT via the jit_dependency marker.
274+
if bodo.test_dataframe_library_enabled:
275+
assert "bodo.decorators" not in sys.modules, (
276+
"JIT was imported before pytest_collection_modifyitems finished."
277+
)
278+
279+
no_jit_dep = [
280+
item for item in items if not item.get_closest_marker("jit_dependency")
281+
]
282+
283+
for item in no_jit_dep:
284+
if (
285+
hasattr(item, "fixturenames")
286+
and "jit_import_check" not in item.fixturenames
287+
):
288+
item.fixturenames = ["jit_import_check"] + item.fixturenames
289+
262290

263291
def group_from_hash(testname, num_groups):
264292
"""

bodo/tests/test_dataframe_part2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
pytest_pandas,
2626
)
2727

28-
pytestmark = pytest_pandas
28+
pytestmark = pytest_pandas + [pytest.mark.jit_dependency]
2929

3030

3131
def test_pd_isna_getitem(memory_leak_check):

bodo/tests/test_dataframe_part3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
pytest_pandas,
3232
)
3333

34-
pytestmark = pytest_pandas
34+
pytestmark = pytest_pandas + [pytest.mark.jit_dependency]
3535

3636

3737
def test_dataframe_apply_method_str(memory_leak_check):

bodo/tests/test_df_lib/test_ai_funcs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from bodo.spawn.spawner import spawn_process_on_nodes
1818
from bodo.tests.utils import _test_equal
1919

20+
pytestmark = pytest.mark.jit_dependency
21+
2022

2123
def test_write_s3_vectors(datapath):
2224
"""Test writing to S3 Vectors using Bodo DataFrame API."""

bodo/tests/test_df_lib/test_direct_series_methods.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from bodo.pandas.plan import assert_executed_plan_count
88
from bodo.tests.utils import _test_equal
99

10+
pytestmark = pytest.mark.jit_dependency
11+
1012

1113
@pytest.mark.parametrize("use_index1", [True, False])
1214
@pytest.mark.parametrize("use_index2", [True, False])

bodo/tests/test_df_lib/test_end_to_end.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
)
2525
from bodo.tests.utils import _test_equal, pytest_mark_spawn_mode, temp_config_override
2626

27+
pytestmark = pytest.mark.jit_dependency
28+
2729
# Various Index kinds to use in test data (assuming maximum size of 100 in input)
2830
MAX_DATA_SIZE = 100
2931

0 commit comments

Comments
 (0)