Skip to content

Commit e595e02

Browse files
committed
[Data] Make test_dataset_throughput deterministic by increasing workload and applying tolerance
1 parent 5d24749 commit e595e02

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

python/ray/data/tests/test_stats.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,18 +1752,20 @@ def test_dataset_throughput(shutdown_only):
17521752
ray.shutdown()
17531753
ray.init(num_cpus=2)
17541754

1755-
f = dummy_map_batches_sleep(0.01)
1755+
f = dummy_map_batches_sleep(0.03)
17561756
ds = ray.data.range(100).map(f).materialize().map(f).materialize()
17571757

17581758
operator_pattern = re.compile(
17591759
r"Operator (\d+).*?\* Operator throughput:\s*.*?\* Ray Data throughput: (\d+\.\d+) rows/s.*?\* Estimated single node throughput: (\d+\.\d+) rows/s",
17601760
re.DOTALL,
17611761
)
17621762

1763+
Tolerance = 0.9
1764+
17631765
# Ray data throughput should always be better than single node throughput for
17641766
# multi-cpu case.
17651767
for match in operator_pattern.findall(ds.stats()):
1766-
assert float(match[1]) >= float(match[2])
1768+
assert float(match[1]) >= float(match[2]) * Tolerance
17671769

17681770
# Pattern to match dataset throughput
17691771
dataset_pattern = re.compile(
@@ -1772,7 +1774,7 @@ def test_dataset_throughput(shutdown_only):
17721774
)
17731775

17741776
dataset_match = dataset_pattern.search(ds.stats())
1775-
assert float(dataset_match[1]) >= float(dataset_match[2])
1777+
assert float(dataset_match[1]) >= float(dataset_match[2]) * Tolerance
17761778

17771779

17781780
def test_individual_operator_num_rows(shutdown_only):

0 commit comments

Comments
 (0)