-
Notifications
You must be signed in to change notification settings - Fork 7.1k
[Data] Fix SF 100 release test #58816
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
0abbd50
0340d58
28ebd4b
9b935b4
3d79ecc
f430e5e
d9ba322
bc4ce3d
7f223e8
4af7aea
bbb0586
b4c0472
b23c251
f995052
5bc7889
74c60a3
8b1ecc7
35bcf3e
398d038
514baa2
4c531c6
11f563e
d45fc6d
d74dad9
d2aa269
3f4a317
ceeb3a7
6951c51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -27,7 +27,10 @@ | |||||
|
|
||||||
| import ray | ||||||
| from ray import ObjectRef | ||||||
| from ray._private.ray_constants import env_integer | ||||||
| from ray._private.ray_constants import ( | ||||||
| DEFAULT_OBJECT_STORE_MEMORY_PROPORTION, | ||||||
| env_integer, | ||||||
| ) | ||||||
| from ray.actor import ActorHandle | ||||||
| from ray.data._internal.arrow_block import ArrowBlockBuilder | ||||||
| from ray.data._internal.arrow_ops.transform_pyarrow import ( | ||||||
|
|
@@ -1179,7 +1182,12 @@ def _get_aggregator_num_cpus( | |||||
| # | ||||||
| cap = min(4.0, total_available_cluster_resources.cpu * 0.25 / num_aggregators) | ||||||
|
|
||||||
| target_num_cpus = min(cap, estimated_aggregator_memory_required / (4 * GiB)) | ||||||
| worker_heap_memory_proportion = 1 - DEFAULT_OBJECT_STORE_MEMORY_PROPORTION | ||||||
| target_num_cpus = min( | ||||||
| cap, | ||||||
| estimated_aggregator_memory_required | ||||||
| / (4 * GiB * worker_heap_memory_proportion), | ||||||
| ) | ||||||
|
|
||||||
| # Round resource to 2d decimal point (for readability) | ||||||
| return round(target_num_cpus, 2) | ||||||
|
|
@@ -1241,18 +1249,25 @@ def _estimate_aggregator_memory_allocation( | |||||
| num_partitions: int, | ||||||
| estimated_dataset_bytes: int, | ||||||
| ) -> int: | ||||||
| max_partitions_for_aggregator = math.ceil( | ||||||
| num_partitions / num_aggregators | ||||||
| ) # Max number of partitions that a single aggregator might handle | ||||||
| partition_byte_size_estimate = math.ceil( | ||||||
| estimated_dataset_bytes / num_partitions | ||||||
| ) # Estimated byte size of a single partition | ||||||
|
|
||||||
| # Add 30% buffer to account for data skew | ||||||
| SKEW_FACTOR = 1.3 | ||||||
|
||||||
| SKEW_FACTOR = 1.3 | |
| SHUFFLE_AGGREGATOR_MEMORY_ESTIMATE_SKEW_FACTOR = 1.3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's preserve the structure of estimating individual components (shuffle, output, working memory)
Uh oh!
There was an error while loading. Please reload this page.