-
Notifications
You must be signed in to change notification settings - Fork 7k
[Train] Per dataset execution_option for DataConfig #58717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
justinvyu
merged 17 commits into
ray-project:master
from
xinyuangui2:allow-config-for-each-ds
Nov 24, 2025
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
68de886
per dataset config
xinyuangui2 afc4de6
fix unittest
xinyuangui2 290099d
fix uniitest
xinyuangui2 0969f6a
doc fix
xinyuangui2 57e400b
Merge branch 'master' into allow-config-for-each-ds
xinyuangui2 68132c0
Update python/ray/train/_internal/data_config.py
xinyuangui2 26ddb12
Update python/ray/train/_internal/data_config.py
xinyuangui2 3193638
Merge branch 'master' into allow-config-for-each-ds
xinyuangui2 315a5f9
resolve comments
xinyuangui2 8c9afa6
add exclude resource test
xinyuangui2 56f0966
Merge branch 'master' into allow-config-for-each-ds
xinyuangui2 39d82fd
move tests to v2
xinyuangui2 e415a83
Merge branch 'master' into allow-config-for-each-ds
xinyuangui2 35a9639
fix unittest
xinyuangui2 b72f08e
default dict use deepcopy
xinyuangui2 949b3be
Merge branch 'master' into allow-config-for-each-ds
xinyuangui2 ffacf63
resolve comments
xinyuangui2 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| from ray.data._internal.execution.interfaces.execution_options import ( | ||
| ExecutionOptions, | ||
| ) | ||
| from ray.train import DataConfig | ||
|
|
||
|
|
||
| def test_per_dataset_execution_options_single(ray_start_4_cpus): | ||
| """Test that a single ExecutionOptions object applies to all datasets.""" | ||
| # Create execution options with specific settings | ||
| execution_options = ExecutionOptions() | ||
| execution_options.preserve_order = True | ||
| execution_options.verbose_progress = True | ||
|
|
||
| data_config = DataConfig(execution_options=execution_options) | ||
|
|
||
| # Verify that all datasets get the same execution options | ||
| train_options = data_config._get_execution_options("train") | ||
| test_options = data_config._get_execution_options("test") | ||
| val_options = data_config._get_execution_options("val") | ||
|
|
||
| assert train_options.preserve_order is True | ||
| assert train_options.verbose_progress is True | ||
| assert test_options.preserve_order is True | ||
| assert test_options.verbose_progress is True | ||
| assert val_options.preserve_order is True | ||
| assert val_options.verbose_progress is True | ||
|
|
||
|
|
||
| def test_per_dataset_execution_options_dict(ray_start_4_cpus): | ||
| """Test that a dict of ExecutionOptions maps to specific datasets, and datasets | ||
| not in the dict get default ingest options. Also tests resource limits.""" | ||
| # Create different execution options for different datasets | ||
| train_options = ExecutionOptions() | ||
| train_options.preserve_order = True | ||
| train_options.verbose_progress = True | ||
| train_options.resource_limits = train_options.resource_limits.copy(cpu=4, gpu=2) | ||
|
|
||
| test_options = ExecutionOptions() | ||
| test_options.preserve_order = False | ||
| test_options.verbose_progress = False | ||
| test_options.resource_limits = test_options.resource_limits.copy(cpu=2, gpu=1) | ||
|
|
||
| execution_options_dict = { | ||
| "train": train_options, | ||
| "test": test_options, | ||
| } | ||
|
|
||
| data_config = DataConfig(execution_options=execution_options_dict) | ||
|
|
||
| # Verify that each dataset in the dict gets its specific options | ||
| retrieved_train_options = data_config._get_execution_options("train") | ||
| retrieved_test_options = data_config._get_execution_options("test") | ||
|
|
||
| assert retrieved_train_options.preserve_order is True | ||
| assert retrieved_train_options.verbose_progress is True | ||
| assert retrieved_test_options.preserve_order is False | ||
| assert retrieved_test_options.verbose_progress is False | ||
|
|
||
| # Verify resource limits | ||
| assert retrieved_train_options.resource_limits.cpu == 4 | ||
| assert retrieved_train_options.resource_limits.gpu == 2 | ||
| assert retrieved_test_options.resource_limits.cpu == 2 | ||
| assert retrieved_test_options.resource_limits.gpu == 1 | ||
|
|
||
| # Verify that a dataset not in the dict gets default options | ||
| default_options = DataConfig.default_ingest_options() | ||
| retrieved_val_options = data_config._get_execution_options("val") | ||
| assert retrieved_val_options.preserve_order == default_options.preserve_order | ||
| assert retrieved_val_options.verbose_progress == default_options.verbose_progress | ||
| assert ( | ||
| retrieved_val_options.resource_limits.cpu == default_options.resource_limits.cpu | ||
| ) | ||
| assert ( | ||
| retrieved_val_options.resource_limits.gpu == default_options.resource_limits.gpu | ||
| ) | ||
|
|
||
|
|
||
| def test_per_dataset_execution_options_default(ray_start_4_cpus): | ||
| """Test that None or empty dict execution_options results in all datasets | ||
| using default options.""" | ||
| # Test with None | ||
| data_config_none = DataConfig(execution_options=None) | ||
| default_options = DataConfig.default_ingest_options() | ||
| retrieved_train_options = data_config_none._get_execution_options("train") | ||
| retrieved_test_options = data_config_none._get_execution_options("test") | ||
|
|
||
| assert retrieved_train_options.preserve_order == default_options.preserve_order | ||
| assert retrieved_test_options.preserve_order == default_options.preserve_order | ||
|
|
||
| # Test with empty dict | ||
| data_config_empty = DataConfig(execution_options={}) | ||
| retrieved_train_options = data_config_empty._get_execution_options("train") | ||
| retrieved_test_options = data_config_empty._get_execution_options("test") | ||
|
|
||
| assert retrieved_train_options.preserve_order == default_options.preserve_order | ||
| assert retrieved_test_options.preserve_order == default_options.preserve_order | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| import sys | ||
|
|
||
| import pytest | ||
|
|
||
| sys.exit(pytest.main(["-v", "-x", __file__])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.