Skip to content

Commit da9e36b

Browse files
committed
1. Remove the "per node throughput" for the "Dataset throughput" section
2. Rename "per node throughput" to "per task throughput" in the "Operator throughput" sections Signed-off-by: dancingactor <[email protected]>
1 parent 1b087b3 commit da9e36b

File tree

2 files changed

+23
-41
lines changed

2 files changed

+23
-41
lines changed

python/ray/data/_internal/stats.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,20 +1211,14 @@ def to_string(
12111211
output_num_rows = self.operators_stats[-1].output_num_rows
12121212
total_num_out_rows = output_num_rows["sum"] if output_num_rows else 0
12131213
wall_time = self.get_total_wall_time()
1214-
total_time_all_blocks = self.get_total_time_all_blocks()
1215-
if total_num_out_rows and wall_time and total_time_all_blocks:
1214+
if total_num_out_rows and wall_time:
12161215
out += "\n"
12171216
out += "Dataset throughput:\n"
12181217
out += (
12191218
"\t* Ray Data throughput:"
12201219
f" {total_num_out_rows / wall_time} "
12211220
"rows/s\n"
12221221
)
1223-
out += (
1224-
"\t* Estimated single node throughput:"
1225-
f" {total_num_out_rows / total_time_all_blocks} "
1226-
"rows/s\n"
1227-
)
12281222
if verbose_stats_logs and add_global_stats:
12291223
out += "\n" + self.runtime_metrics()
12301224

@@ -1678,7 +1672,7 @@ def __str__(self) -> str:
16781672
"rows/s\n"
16791673
)
16801674
out += (
1681-
indent + "\t* Estimated single node throughput:"
1675+
indent + "\t* Estimated single task throughput:"
16821676
f" {total_num_out_rows / wall_time_stats['sum']} "
16831677
"rows/s\n"
16841678
)

python/ray/data/tests/test_stats.py

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def test_streaming_split_stats(ray_start_regular_shared, restore_data_context):
424424
* Total input num rows: N rows
425425
* Total output num rows: N rows
426426
* Ray Data throughput: N rows/s
427-
* Estimated single node throughput: N rows/s
427+
* Estimated single task throughput: N rows/s
428428
* Extra metrics: {extra_metrics_1}
429429
430430
Operator N split(N, equal=False): \n"""
@@ -491,7 +491,7 @@ def test_large_args_scheduling_strategy(
491491
f" * Total input num rows: N rows\n"
492492
f" * Total output num rows: N rows\n"
493493
f" * Ray Data throughput: N rows/s\n"
494-
f" * Estimated single node throughput: N rows/s\n"
494+
f" * Estimated single task throughput: N rows/s\n"
495495
f"{read_extra_metrics}\n"
496496
f"Operator N MapBatches(dummy_map_batches): {EXECUTION_STRING}\n"
497497
f"* Remote wall time: T min, T max, T mean, T total\n"
@@ -506,12 +506,11 @@ def test_large_args_scheduling_strategy(
506506
f" * Total input num rows: N rows\n"
507507
f" * Total output num rows: N rows\n"
508508
f" * Ray Data throughput: N rows/s\n"
509-
f" * Estimated single node throughput: N rows/s\n"
509+
f" * Estimated single task throughput: N rows/s\n"
510510
f"{map_extra_metrics}"
511511
f"\n"
512512
f"Dataset throughput:\n"
513513
f" * Ray Data throughput: N rows/s\n"
514-
f" * Estimated single node throughput: N rows/s\n"
515514
f"{gen_runtime_metrics_str(['ReadRange','MapBatches(dummy_map_batches)'], verbose_stats_logs)}" # noqa: E501
516515
)
517516
print(canonicalize(stats))
@@ -552,12 +551,11 @@ def test_dataset_stats_basic(
552551
f" * Total input num rows: N rows\n"
553552
f" * Total output num rows: N rows\n"
554553
f" * Ray Data throughput: N rows/s\n"
555-
f" * Estimated single node throughput: N rows/s\n"
554+
f" * Estimated single task throughput: N rows/s\n"
556555
f"{gen_extra_metrics_str(STANDARD_EXTRA_METRICS_TASK_BACKPRESSURE, verbose_stats_logs)}" # noqa: E501
557556
f"\n"
558557
f"Dataset throughput:\n"
559558
f" * Ray Data throughput: N rows/s\n"
560-
f" * Estimated single node throughput: N rows/s\n"
561559
f"{gen_runtime_metrics_str(['ReadRange->MapBatches(dummy_map_batches)'], verbose_stats_logs)}" # noqa: E501
562560
)
563561

@@ -579,12 +577,11 @@ def test_dataset_stats_basic(
579577
f" * Total input num rows: N rows\n"
580578
f" * Total output num rows: N rows\n"
581579
f" * Ray Data throughput: N rows/s\n"
582-
f" * Estimated single node throughput: N rows/s\n"
580+
f" * Estimated single task throughput: N rows/s\n"
583581
f"{gen_extra_metrics_str(STANDARD_EXTRA_METRICS_TASK_BACKPRESSURE, verbose_stats_logs)}" # noqa: E501
584582
f"\n"
585583
f"Dataset throughput:\n"
586584
f" * Ray Data throughput: N rows/s\n"
587-
f" * Estimated single node throughput: N rows/s\n"
588585
f"{gen_runtime_metrics_str(['ReadRange->MapBatches(dummy_map_batches)','Map(dummy_map_batches)'], verbose_stats_logs)}" # noqa: E501
589586
)
590587

@@ -611,7 +608,7 @@ def test_dataset_stats_basic(
611608
f" * Total input num rows: N rows\n"
612609
f" * Total output num rows: N rows\n"
613610
f" * Ray Data throughput: N rows/s\n"
614-
f" * Estimated single node throughput: N rows/s\n"
611+
f" * Estimated single task throughput: N rows/s\n"
615612
f"{extra_metrics}\n"
616613
f"Operator N Map(dummy_map_batches): {EXECUTION_STRING}\n"
617614
f"* Remote wall time: T min, T max, T mean, T total\n"
@@ -626,7 +623,7 @@ def test_dataset_stats_basic(
626623
f" * Total input num rows: N rows\n"
627624
f" * Total output num rows: N rows\n"
628625
f" * Ray Data throughput: N rows/s\n"
629-
f" * Estimated single node throughput: N rows/s\n"
626+
f" * Estimated single task throughput: N rows/s\n"
630627
f"{extra_metrics}\n"
631628
f"Dataset iterator time breakdown:\n"
632629
f"* Total time overall: T\n"
@@ -642,7 +639,6 @@ def test_dataset_stats_basic(
642639
f"\n"
643640
f"Dataset throughput:\n"
644641
f" * Ray Data throughput: N rows/s\n"
645-
f" * Estimated single node throughput: N rows/s\n"
646642
f"{gen_runtime_metrics_str(['ReadRange->MapBatches(dummy_map_batches)','Map(dummy_map_batches)'], verbose_stats_logs)}" # noqa: E501
647643
)
648644

@@ -671,7 +667,7 @@ def test_block_location_nums(ray_start_regular_shared, restore_data_context):
671667
f" * Total input num rows: N rows\n"
672668
f" * Total output num rows: N rows\n"
673669
f" * Ray Data throughput: N rows/s\n"
674-
f" * Estimated single node throughput: N rows/s\n"
670+
f" * Estimated single task throughput: N rows/s\n"
675671
f"\n"
676672
f"Dataset iterator time breakdown:\n"
677673
f"* Total time overall: T\n"
@@ -691,7 +687,6 @@ def test_block_location_nums(ray_start_regular_shared, restore_data_context):
691687
f"\n"
692688
f"Dataset throughput:\n"
693689
f" * Ray Data throughput: N rows/s\n"
694-
f" * Estimated single node throughput: N rows/s\n"
695690
)
696691

697692

@@ -1100,7 +1095,7 @@ def test_dataset_stats_shuffle(ray_start_regular_shared):
11001095
* Total input num rows: N rows
11011096
* Total output num rows: N rows
11021097
* Ray Data throughput: N rows/s
1103-
* Estimated single node throughput: N rows/s
1098+
* Estimated single task throughput: N rows/s
11041099
11051100
Suboperator N RandomShuffleReduce: N tasks executed, N blocks produced
11061101
* Remote wall time: T min, T max, T mean, T total
@@ -1115,7 +1110,7 @@ def test_dataset_stats_shuffle(ray_start_regular_shared):
11151110
* Total input num rows: N rows
11161111
* Total output num rows: N rows
11171112
* Ray Data throughput: N rows/s
1118-
* Estimated single node throughput: N rows/s
1113+
* Estimated single task throughput: N rows/s
11191114
11201115
Operator N Repartition: executed in T
11211116
@@ -1132,7 +1127,7 @@ def test_dataset_stats_shuffle(ray_start_regular_shared):
11321127
* Total input num rows: N rows
11331128
* Total output num rows: N rows
11341129
* Ray Data throughput: N rows/s
1135-
* Estimated single node throughput: N rows/s
1130+
* Estimated single task throughput: N rows/s
11361131
11371132
Suboperator N RepartitionReduce: N tasks executed, N blocks produced
11381133
* Remote wall time: T min, T max, T mean, T total
@@ -1147,11 +1142,10 @@ def test_dataset_stats_shuffle(ray_start_regular_shared):
11471142
* Total input num rows: N rows
11481143
* Total output num rows: N rows
11491144
* Ray Data throughput: N rows/s
1150-
* Estimated single node throughput: N rows/s
1145+
* Estimated single task throughput: N rows/s
11511146
11521147
Dataset throughput:
11531148
* Ray Data throughput: N rows/s
1154-
* Estimated single node throughput: N rows/s
11551149
"""
11561150
)
11571151

@@ -1208,11 +1202,10 @@ def test_dataset_stats_range(ray_start_regular_shared, tmp_path):
12081202
f" * Total input num rows: N rows\n"
12091203
f" * Total output num rows: N rows\n"
12101204
f" * Ray Data throughput: N rows/s\n"
1211-
f" * Estimated single node throughput: N rows/s\n"
1205+
f" * Estimated single task throughput: N rows/s\n"
12121206
f"\n"
12131207
f"Dataset throughput:\n"
12141208
f" * Ray Data throughput: N rows/s\n"
1215-
f" * Estimated single node throughput: N rows/s\n"
12161209
)
12171210

12181211

@@ -1242,7 +1235,7 @@ def test_dataset_split_stats(ray_start_regular_shared, tmp_path, restore_data_co
12421235
f" * Total input num rows: N rows\n"
12431236
f" * Total output num rows: N rows\n"
12441237
f" * Ray Data throughput: N rows/s\n"
1245-
f" * Estimated single node throughput: N rows/s\n"
1238+
f" * Estimated single task throughput: N rows/s\n"
12461239
f"\n"
12471240
f"Operator N Split: {EXECUTION_STRING}\n"
12481241
f"* Remote wall time: T min, T max, T mean, T total\n"
@@ -1257,7 +1250,7 @@ def test_dataset_split_stats(ray_start_regular_shared, tmp_path, restore_data_co
12571250
f" * Total input num rows: N rows\n"
12581251
f" * Total output num rows: N rows\n"
12591252
f" * Ray Data throughput: N rows/s\n"
1260-
f" * Estimated single node throughput: N rows/s\n"
1253+
f" * Estimated single task throughput: N rows/s\n"
12611254
f"\n"
12621255
f"Operator N Map(<lambda>): {EXECUTION_STRING}\n"
12631256
f"* Remote wall time: T min, T max, T mean, T total\n"
@@ -1272,11 +1265,10 @@ def test_dataset_split_stats(ray_start_regular_shared, tmp_path, restore_data_co
12721265
f" * Total input num rows: N rows\n"
12731266
f" * Total output num rows: N rows\n"
12741267
f" * Ray Data throughput: N rows/s\n"
1275-
f" * Estimated single node throughput: N rows/s\n"
1268+
f" * Estimated single task throughput: N rows/s\n"
12761269
f"\n"
12771270
f"Dataset throughput:\n"
12781271
f" * Ray Data throughput: N rows/s\n"
1279-
f" * Estimated single node throughput: N rows/s\n"
12801272
)
12811273

12821274

@@ -1474,7 +1466,7 @@ def test_streaming_stats_full(ray_start_regular_shared, restore_data_context):
14741466
* Total input num rows: N rows
14751467
* Total output num rows: N rows
14761468
* Ray Data throughput: N rows/s
1477-
* Estimated single node throughput: N rows/s
1469+
* Estimated single task throughput: N rows/s
14781470
14791471
Dataset iterator time breakdown:
14801472
* Total time overall: T
@@ -1490,7 +1482,6 @@ def test_streaming_stats_full(ray_start_regular_shared, restore_data_context):
14901482
14911483
Dataset throughput:
14921484
* Ray Data throughput: N rows/s
1493-
* Estimated single node throughput: N rows/s
14941485
"""
14951486
)
14961487

@@ -1515,11 +1506,10 @@ def test_write_ds_stats(ray_start_regular_shared, tmp_path):
15151506
* Total input num rows: N rows
15161507
* Total output num rows: N rows
15171508
* Ray Data throughput: N rows/s
1518-
* Estimated single node throughput: N rows/s
1509+
* Estimated single task throughput: N rows/s
15191510
15201511
Dataset throughput:
15211512
* Ray Data throughput: N rows/s
1522-
* Estimated single node throughput: N rows/s
15231513
"""
15241514
)
15251515

@@ -1548,7 +1538,7 @@ def test_write_ds_stats(ray_start_regular_shared, tmp_path):
15481538
* Total input num rows: N rows
15491539
* Total output num rows: N rows
15501540
* Ray Data throughput: N rows/s
1551-
* Estimated single node throughput: N rows/s
1541+
* Estimated single task throughput: N rows/s
15521542
15531543
Operator N Write: {EXECUTION_STRING}
15541544
* Remote wall time: T min, T max, T mean, T total
@@ -1563,11 +1553,10 @@ def test_write_ds_stats(ray_start_regular_shared, tmp_path):
15631553
* Total input num rows: N rows
15641554
* Total output num rows: N rows
15651555
* Ray Data throughput: N rows/s
1566-
* Estimated single node throughput: N rows/s
1556+
* Estimated single task throughput: N rows/s
15671557
15681558
Dataset throughput:
15691559
* Ray Data throughput: N rows/s
1570-
* Estimated single node throughput: N rows/s
15711560
"""
15721561
)
15731562

@@ -1908,15 +1897,14 @@ def test_spilled_stats(shutdown_only, verbose_stats_logs, restore_data_context):
19081897
f" * Total input num rows: N rows\n"
19091898
f" * Total output num rows: N rows\n"
19101899
f" * Ray Data throughput: N rows/s\n"
1911-
f" * Estimated single node throughput: N rows/s\n"
1900+
f" * Estimated single task throughput: N rows/s\n"
19121901
f"{extra_metrics}\n"
19131902
f"Cluster memory:\n"
19141903
f"* Spilled to disk: M\n"
19151904
f"* Restored from disk: M\n"
19161905
f"\n"
19171906
f"Dataset throughput:\n"
19181907
f" * Ray Data throughput: N rows/s\n"
1919-
f" * Estimated single node throughput: N rows/s\n"
19201908
f"{gen_runtime_metrics_str(['ReadRange->MapBatches(<lambda>)'], verbose_stats_logs)}" # noqa: E501
19211909
)
19221910

0 commit comments

Comments
 (0)