Skip to content

Commit

Permalink
Default prefetch configs for evaluation (#376)
Browse files Browse the repository at this point in the history
`num_prefetched_partitions` and `parallel_prefetch_requests` are two
optional parameters in pipeline config, but required when
`--evaluation-matrix` is specified. This PR makes them 1 (the default
value according to the schema) when they are not specified but needed.

Additionally this PR adds a small test to increase test coverage.
  • Loading branch information
XianzheMa authored Mar 28, 2024
1 parent ab9834e commit 61d39e5
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:

- name: Pytest
run: |
micromamba run -n modyn pytest modyn --cov-reset --cache-clear --cov-fail-under=90
micromamba run -n modyn pytest modyn --cache-clear --cov-report term --cov-fail-under=90
micromamba run -n modyn pytest > pytest-coverage.txt
- name: Comment coverage
Expand Down
12 changes: 10 additions & 2 deletions modyn/supervisor/internal/grpc_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,16 @@ def start_evaluation(

# In case we want to evaluate on trigger training set
dataset_id = pipeline_config["data"]["dataset_id"]
num_prefetched_partitions = pipeline_config["training"]["num_prefetched_partitions"]
parallel_prefetch_requests = pipeline_config["training"]["parallel_prefetch_requests"]
num_prefetched_partitions = (
pipeline_config["training"]["num_prefetched_partitions"]
if "num_prefetched_partitions" in pipeline_config["training"]
else 1
)
parallel_prefetch_requests = (
pipeline_config["training"]["parallel_prefetch_requests"]
if "parallel_prefetch_requests" in pipeline_config["training"]
else 1
)

train_eval_dataset = None
for dataset in pipeline_config["evaluation"]["datasets"]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=unused-argument,redefined-outer-name
from unittest.mock import patch
from unittest.mock import Mock, patch

from modyn.common.grpc import GenericGRPCServer
from modyn.supervisor.internal.grpc.supervisor_grpc_server import SupervisorGRPCServer
from modyn.supervisor.internal.grpc_handler import GRPCHandler
from modyn.supervisor.internal.supervisor import Supervisor
Expand All @@ -25,8 +26,14 @@ def noop_constructor_mock(self, modyn_config: dict) -> None:
@patch.object(GRPCHandler, "__init__", noop_constructor_mock)
@patch.object(Supervisor, "init_cluster_connection", noop)
@patch.object(Supervisor, "init_metadata_db", noop_init_metadata_db)
def test_init():
@patch.object(GenericGRPCServer, "__init__")
def test_init(generic_grpc_server_init_mock: Mock):
modyn_config = get_minimal_modyn_config()
grpc_server = SupervisorGRPCServer(modyn_config)
assert grpc_server.modyn_config == modyn_config
assert isinstance(grpc_server.supervisor, Supervisor)

expected_callback_kwargs = {"supervisor": grpc_server.supervisor}
generic_grpc_server_init_mock.assert_called_once_with(
modyn_config, modyn_config["supervisor"]["port"], SupervisorGRPCServer.callback, expected_callback_kwargs
)
25 changes: 24 additions & 1 deletion modyn/tests/supervisor/internal/test_grpc_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,30 @@ def test_start_evaluation(test_connection_established):
avail_method.assert_called_once()


def test__prepare_evaluation_request():
@patch("modyn.supervisor.internal.grpc_handler.grpc_connection_established", return_value=True)
@patch.object(GRPCHandler, "_prepare_evaluation_request")
def test_start_evaluation_default_prefetch_configs(prepare_evaluation_request_mock, grpc_connection_established_mock):
handler = GRPCHandler(get_simple_config(), mp.Queue(), mp.Queue(), mp.Queue())
handler.init_cluster_connection()

model_id = 10
pipeline_id = 11
trigger_id = 12
pipeline_config = get_minimal_pipeline_config()
eval_dataset = pipeline_config["evaluation"]["datasets"][0]
eval_dataset["dataset_id"] = pipeline_config["data"]["dataset_id"]
with patch.object(
handler.evaluator,
"evaluate_model",
return_value=EvaluateModelResponse(evaluation_started=True, evaluation_id=12, dataset_size=1000),
):
handler.start_evaluation(model_id, pipeline_config, pipeline_id, trigger_id)
prepare_evaluation_request_mock.assert_called_once_with(
eval_dataset, model_id, "cpu", pipeline_id, trigger_id, 1, 1
)


def test_prepare_evaluation_request():
pipeline_config = get_minimal_pipeline_config()
request = GRPCHandler._prepare_evaluation_request(pipeline_config["evaluation"]["datasets"][0], 23, "cpu")

Expand Down

0 comments on commit 61d39e5

Please sign in to comment.