Default prefetch configs for evaluation (#376)

`num_prefetched_partitions` and `parallel_prefetch_requests` are two optional parameters in pipeline config, but required when `--evaluation-matrix` is specified. This PR makes them 1 (the default value according to the schema) when they are not specified but needed. Additionally this PR adds a small test to increase test coverage.
eth-easl · Mar 28, 2024 · 61d39e5 · 61d39e5
1 parent ab9834e
commit 61d39e5
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 6 deletions.
diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml
@@ -105,7 +105,7 @@ jobs:
 
       - name: Pytest
         run: |
-          micromamba run -n modyn pytest modyn --cov-reset --cache-clear --cov-fail-under=90
+          micromamba run -n modyn pytest modyn --cache-clear --cov-report term --cov-fail-under=90
           micromamba run -n modyn pytest > pytest-coverage.txt
 
       - name: Comment coverage

diff --git a/modyn/supervisor/internal/grpc_handler.py b/modyn/supervisor/internal/grpc_handler.py
@@ -549,8 +549,16 @@ def start_evaluation(
 
         # In case we want to evaluate on trigger training set
         dataset_id = pipeline_config["data"]["dataset_id"]
-        num_prefetched_partitions = pipeline_config["training"]["num_prefetched_partitions"]
-        parallel_prefetch_requests = pipeline_config["training"]["parallel_prefetch_requests"]
+        num_prefetched_partitions = (
+            pipeline_config["training"]["num_prefetched_partitions"]
+            if "num_prefetched_partitions" in pipeline_config["training"]
+            else 1
+        )
+        parallel_prefetch_requests = (
+            pipeline_config["training"]["parallel_prefetch_requests"]
+            if "parallel_prefetch_requests" in pipeline_config["training"]
+            else 1
+        )
 
         train_eval_dataset = None
         for dataset in pipeline_config["evaluation"]["datasets"]:

diff --git a/modyn/tests/supervisor/internal/grpc/test_supervisor_grpc_server.py b/modyn/tests/supervisor/internal/grpc/test_supervisor_grpc_server.py
@@ -1,6 +1,7 @@
 # pylint: disable=unused-argument,redefined-outer-name
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
+from modyn.common.grpc import GenericGRPCServer
 from modyn.supervisor.internal.grpc.supervisor_grpc_server import SupervisorGRPCServer
 from modyn.supervisor.internal.grpc_handler import GRPCHandler
 from modyn.supervisor.internal.supervisor import Supervisor
@@ -25,8 +26,14 @@ def noop_constructor_mock(self, modyn_config: dict) -> None:
 @patch.object(GRPCHandler, "__init__", noop_constructor_mock)
 @patch.object(Supervisor, "init_cluster_connection", noop)
 @patch.object(Supervisor, "init_metadata_db", noop_init_metadata_db)
-def test_init():
+@patch.object(GenericGRPCServer, "__init__")
+def test_init(generic_grpc_server_init_mock: Mock):
     modyn_config = get_minimal_modyn_config()
     grpc_server = SupervisorGRPCServer(modyn_config)
     assert grpc_server.modyn_config == modyn_config
     assert isinstance(grpc_server.supervisor, Supervisor)
+
+    expected_callback_kwargs = {"supervisor": grpc_server.supervisor}
+    generic_grpc_server_init_mock.assert_called_once_with(
+        modyn_config, modyn_config["supervisor"]["port"], SupervisorGRPCServer.callback, expected_callback_kwargs
+    )
diff --git a/modyn/tests/supervisor/internal/test_grpc_handler.py b/modyn/tests/supervisor/internal/test_grpc_handler.py
@@ -444,7 +444,30 @@ def test_start_evaluation(test_connection_established):
         avail_method.assert_called_once()
 
 
-def test__prepare_evaluation_request():
+@patch("modyn.supervisor.internal.grpc_handler.grpc_connection_established", return_value=True)
+@patch.object(GRPCHandler, "_prepare_evaluation_request")
+def test_start_evaluation_default_prefetch_configs(prepare_evaluation_request_mock, grpc_connection_established_mock):
+    handler = GRPCHandler(get_simple_config(), mp.Queue(), mp.Queue(), mp.Queue())
+    handler.init_cluster_connection()
+
+    model_id = 10
+    pipeline_id = 11
+    trigger_id = 12
+    pipeline_config = get_minimal_pipeline_config()
+    eval_dataset = pipeline_config["evaluation"]["datasets"][0]
+    eval_dataset["dataset_id"] = pipeline_config["data"]["dataset_id"]
+    with patch.object(
+        handler.evaluator,
+        "evaluate_model",
+        return_value=EvaluateModelResponse(evaluation_started=True, evaluation_id=12, dataset_size=1000),
+    ):
+        handler.start_evaluation(model_id, pipeline_config, pipeline_id, trigger_id)
+        prepare_evaluation_request_mock.assert_called_once_with(
+            eval_dataset, model_id, "cpu", pipeline_id, trigger_id, 1, 1
+        )
+
+
+def test_prepare_evaluation_request():
     pipeline_config = get_minimal_pipeline_config()
     request = GRPCHandler._prepare_evaluation_request(pipeline_config["evaluation"]["datasets"][0], 23, "cpu")