From edc51bdce6a5ec40e11f7d98423e1e39f7c37eda Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Thu, 8 Dec 2022 23:20:23 +0000
Subject: [PATCH 01/11] Fix read_tfrecords_benchmark nightly test

Signed-off-by: jianoaix <iamjianxiao@gmail.com>
---
 release/nightly_tests/dataset/read_tfrecords_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/release/nightly_tests/dataset/read_tfrecords_benchmark.py b/release/nightly_tests/dataset/read_tfrecords_benchmark.py
index 45a5cea2d24b..bf189b450d8f 100644
--- a/release/nightly_tests/dataset/read_tfrecords_benchmark.py
+++ b/release/nightly_tests/dataset/read_tfrecords_benchmark.py
@@ -25,7 +25,7 @@ def generate_tfrecords_from_images(
 
         # Convert images from NumPy to bytes
         def images_to_bytes(batch):
-            images_as_bytes = [image.tobytes() for image in batch]
+            images_as_bytes = [image.tobytes() for image in batch.values()]
             return pa.table({"image": images_as_bytes})
 
         ds = ds.map_batches(images_to_bytes, batch_format="numpy")

From bfd724d8ed3afa5c40a8dd17d85f6aee783ab0b1 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Mon, 13 Feb 2023 21:14:18 +0000
Subject: [PATCH 02/11] Enable streaming executor by default

---
 python/ray/data/context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/ray/data/context.py b/python/ray/data/context.py
index cb953f04696a..96cd4f86d00e 100644
--- a/python/ray/data/context.py
+++ b/python/ray/data/context.py
@@ -76,7 +76,7 @@
 # Whether to use the streaming executor. This only has an effect if the new execution
 # backend is enabled.
 DEFAULT_USE_STREAMING_EXECUTOR = bool(
-    int(os.environ.get("RAY_DATASET_USE_STREAMING_EXECUTOR", "0"))
+    int(os.environ.get("RAY_DATASET_USE_STREAMING_EXECUTOR", "1"))
 )
 
 # Whether to eagerly free memory (new backend only).

From e1d0df53cf1ab31dc78d058acd4142281465c4db Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Thu, 16 Feb 2023 18:28:21 +0000
Subject: [PATCH 03/11] current resource usage

---
 python/ray/data/_internal/execution/streaming_executor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/ray/data/_internal/execution/streaming_executor.py b/python/ray/data/_internal/execution/streaming_executor.py
index db3783c897da..8647b7f6aa12 100644
--- a/python/ray/data/_internal/execution/streaming_executor.py
+++ b/python/ray/data/_internal/execution/streaming_executor.py
@@ -236,7 +236,8 @@ def _get_current_usage(self, topology: Topology) -> ExecutionResources:
             if isinstance(op, InputDataBuffer):
                 continue  # Don't count input refs towards dynamic memory usage.
             for bundle in state.outqueue:
-                cur_usage.object_store_memory += bundle.size_bytes()
+                if cur_usage.object_store_memory:
+                    cur_usage.object_store_memory += bundle.size_bytes()
         return cur_usage
 
     def _report_current_usage(

From 94a88eee5024b4ea00447833839325df34af8965 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Thu, 16 Feb 2023 21:38:25 +0000
Subject: [PATCH 04/11] parquet test: num computed

---
 python/ray/data/tests/test_dataset_parquet.py | 65 ++++++++++---------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/python/ray/data/tests/test_dataset_parquet.py b/python/ray/data/tests/test_dataset_parquet.py
index 472b69fb0335..7e98c7fd0e32 100644
--- a/python/ray/data/tests/test_dataset_parquet.py
+++ b/python/ray/data/tests/test_dataset_parquet.py
@@ -31,6 +31,11 @@
 from pytest_lazyfixture import lazy_fixture
 
 
+def check_num_computed(ds, expected) -> None:
+    if not ray.data.context.DatasetContext.get_current().use_streaming_executor:
+        ds._plan.execute()._num_computed() == expected
+
+
 @pytest.mark.parametrize(
     "fs,data_path",
     [
@@ -122,11 +127,11 @@ def test_parquet_read_basic(ray_start_regular_shared, fs, data_path):
     ds = ray.data.read_parquet(data_path, filesystem=fs)
 
     # Test metadata-only parquet ops.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
-    assert ds.schema() is not None
-    assert ds._plan.execute()._num_computed() == 1
+    # assert ds.schema() is not None
+    check_num_computed(ds, 1)
     input_files = ds.input_files()
     assert len(input_files) == 2, input_files
     assert "test1.parquet" in str(input_files)
@@ -139,11 +144,11 @@ def test_parquet_read_basic(ray_start_regular_shared, fs, data_path):
         repr(ds) == "Dataset(num_blocks=2, num_rows=6, "
         "schema={one: int64, two: string})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
 
     # Forces a data read.
-    values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    values = [[s["one"], s["two"]] for s in ds.take_all()]
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [2, "b"],
@@ -201,7 +206,7 @@ def prefetch_file_metadata(self, pieces):
     )
 
     # Expect to lazily compute all metadata correctly.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
@@ -217,11 +222,11 @@ def prefetch_file_metadata(self, pieces):
         repr(ds) == "Dataset(num_blocks=2, num_rows=6, "
         "schema={one: int64, two: string})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [2, "b"],
@@ -278,7 +283,7 @@ def test_parquet_read_bulk(ray_start_regular_shared, fs, data_path):
     assert ds._meta_count() is None
 
     # Expect to lazily compute all metadata correctly.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
@@ -294,11 +299,11 @@ def test_parquet_read_bulk(ray_start_regular_shared, fs, data_path):
         repr(ds) == "Dataset(num_blocks=2, num_rows=6, "
         "schema={one: int64, two: string})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [2, "b"],
@@ -319,7 +324,7 @@ def test_parquet_read_bulk(ray_start_regular_shared, fs, data_path):
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [2, "b"],
@@ -368,7 +373,7 @@ def test_parquet_read_bulk_meta_provider(ray_start_regular_shared, fs, data_path
     assert ds._meta_count() is None
 
     # Expect to lazily compute all metadata correctly.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
@@ -384,11 +389,11 @@ def test_parquet_read_bulk_meta_provider(ray_start_regular_shared, fs, data_path
         repr(ds) == "Dataset(num_blocks=2, num_rows=6, "
         "schema={one: int64, two: string})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [2, "b"],
@@ -427,7 +432,7 @@ def test_parquet_read_partitioned(ray_start_regular_shared, fs, data_path):
     ds = ray.data.read_parquet(data_path, filesystem=fs)
 
     # Test metadata-only parquet ops.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
@@ -443,11 +448,11 @@ def test_parquet_read_partitioned(ray_start_regular_shared, fs, data_path):
         "schema={two: string, "
         "one: dictionary<values=int32, indices=int32, ordered=0>})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [1, "b"],
@@ -479,7 +484,7 @@ def test_parquet_read_partitioned_with_filter(ray_start_regular_shared, tmp_path
     )
 
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
     assert sorted(values) == [[1, "a"], [1, "a"]]
 
     # 2 partitions, 1 empty partition, 2 block/read tasks, 1 empty block
@@ -489,7 +494,7 @@ def test_parquet_read_partitioned_with_filter(ray_start_regular_shared, tmp_path
     )
 
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [[1, "a"], [1, "a"]]
 
 
@@ -513,7 +518,7 @@ def test_parquet_read_partitioned_explicit(ray_start_regular_shared, tmp_path):
     )
 
     # Test metadata-only parquet ops.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == 6
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
@@ -527,11 +532,11 @@ def test_parquet_read_partitioned_explicit(ray_start_regular_shared, tmp_path):
         repr(ds) == "Dataset(num_blocks=2, num_rows=6, "
         "schema={two: string, one: int32})"
     ), ds
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
 
     # Forces a data read.
     values = [[s["one"], s["two"]] for s in ds.take()]
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     assert sorted(values) == [
         [1, "a"],
         [1, "b"],
@@ -560,7 +565,7 @@ def _block_udf(block: pa.Table):
     ds = ray.data.read_parquet(str(tmp_path), parallelism=1, _block_udf=_block_udf)
 
     ones, twos = zip(*[[s["one"], s["two"]] for s in ds.take()])
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
     np.testing.assert_array_equal(sorted(ones), np.array(one_data) + 1)
 
     # 2 blocks/read tasks
@@ -568,7 +573,7 @@ def _block_udf(block: pa.Table):
     ds = ray.data.read_parquet(str(tmp_path), parallelism=2, _block_udf=_block_udf)
 
     ones, twos = zip(*[[s["one"], s["two"]] for s in ds.take()])
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     np.testing.assert_array_equal(sorted(ones), np.array(one_data) + 1)
 
     # 2 blocks/read tasks, 1 empty block
@@ -581,7 +586,7 @@ def _block_udf(block: pa.Table):
     )
 
     ones, twos = zip(*[[s["one"], s["two"]] for s in ds.take()])
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(ds, 2)
     np.testing.assert_array_equal(sorted(ones), np.array(one_data[:2]) + 1)
 
 
@@ -611,17 +616,17 @@ def test_parquet_read_parallel_meta_fetch(ray_start_regular_shared, fs, data_pat
     ds = ray.data.read_parquet(data_path, filesystem=fs, parallelism=parallelism)
 
     # Test metadata-only parquet ops.
-    assert ds._plan.execute()._num_computed() == 0
+    check_num_computed(ds, 0)
     assert ds.count() == num_dfs * 3
     assert ds.size_bytes() > 0
     assert ds.schema() is not None
     input_files = ds.input_files()
     assert len(input_files) == num_dfs, input_files
-    assert ds._plan.execute()._num_computed() == 1
+    check_num_computed(ds, 1)
 
     # Forces a data read.
     values = [s["one"] for s in ds.take(limit=3 * num_dfs)]
-    assert ds._plan.execute()._num_computed() == parallelism
+    check_num_computed(ds, parallelism)
     assert sorted(values) == list(range(3 * num_dfs))
 
 

From c670a3b584968dac8d48d1a2f361c18abc57d732 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Thu, 16 Feb 2023 23:38:15 +0000
Subject: [PATCH 05/11] resource limits

---
 python/ray/data/tests/test_dataset.py         | 4 ++--
 python/ray/data/tests/test_dataset_image.py   | 5 ++++-
 python/ray/data/tests/test_optimize.py        | 8 ++++++--
 python/ray/data/tests/test_size_estimation.py | 4 +++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/python/ray/data/tests/test_dataset.py b/python/ray/data/tests/test_dataset.py
index 933dc3952e1d..7322c9bc7df3 100644
--- a/python/ray/data/tests/test_dataset.py
+++ b/python/ray/data/tests/test_dataset.py
@@ -90,7 +90,7 @@ def test_bulk_lazy_eval_split_mode(shutdown_only, block_split, tmp_path):
 
 @pytest.mark.parametrize("pipelined", [False, True])
 def test_basic_actors(shutdown_only, pipelined):
-    ray.init(num_cpus=2)
+    ray.init(num_cpus=6)
     n = 5
     ds = ray.data.range(n)
     ds = maybe_pipeline(ds, pipelined)
@@ -152,7 +152,7 @@ def run():
 
 
 def test_callable_classes(shutdown_only):
-    ray.init(num_cpus=1)
+    ray.init(num_cpus=2)
     ds = ray.data.range(10, parallelism=10)
 
     class StatefulFn:
diff --git a/python/ray/data/tests/test_dataset_image.py b/python/ray/data/tests/test_dataset_image.py
index c544744e41a2..1d17ece2d860 100644
--- a/python/ray/data/tests/test_dataset_image.py
+++ b/python/ray/data/tests/test_dataset_image.py
@@ -114,13 +114,16 @@ def get_relative_path(path: str) -> str:
             "image-datasets/simple/image3.jpg",
         ]
 
-    def test_e2e_prediction(self, ray_start_regular_shared):
+    def test_e2e_prediction(self, shutdown_only):
         from ray.train.torch import TorchCheckpoint, TorchPredictor
         from ray.train.batch_predictor import BatchPredictor
 
         from torchvision import transforms
         from torchvision.models import resnet18
 
+        ray.shutdown()
+        ray.init(num_cpus=2)
+
         dataset = ray.data.read_images("example://image-datasets/simple")
         transform = transforms.ToTensor()
 
diff --git a/python/ray/data/tests/test_optimize.py b/python/ray/data/tests/test_optimize.py
index 15793f4ed3b0..9267d0b783d6 100644
--- a/python/ray/data/tests/test_optimize.py
+++ b/python/ray/data/tests/test_optimize.py
@@ -508,7 +508,9 @@ def test_optimize_equivalent_remote_args(ray_start_regular_shared):
             )
 
 
-def test_optimize_incompatible_stages(ray_start_regular_shared):
+def test_optimize_incompatible_stages(shutdown_only):
+    ray.shutdown()
+    ray.init(num_cpus=2)
     context = DatasetContext.get_current()
     context.optimize_fuse_stages = True
     context.optimize_fuse_read_stages = True
@@ -548,7 +550,9 @@ def test_optimize_incompatible_stages(ray_start_regular_shared):
     )
 
 
-def test_optimize_callable_classes(ray_start_regular_shared, tmp_path):
+def test_optimize_callable_classes(shutdown_only, tmp_path):
+    ray.shutdown()
+    ray.init(num_cpus=2)
     context = DatasetContext.get_current()
     context.optimize_fuse_stages = True
     context.optimize_fuse_read_stages = True
diff --git a/python/ray/data/tests/test_size_estimation.py b/python/ray/data/tests/test_size_estimation.py
index 018a85394260..63426126ebb0 100644
--- a/python/ray/data/tests/test_size_estimation.py
+++ b/python/ray/data/tests/test_size_estimation.py
@@ -193,7 +193,9 @@ def gen(name):
 
 
 @pytest.mark.parametrize("use_actors", [False, True])
-def test_split_map(ray_start_regular_shared, use_actors):
+def test_split_map(shutdown_only, use_actors):
+    ray.shutdown()
+    ray.init(num_cpus=2)
     kwargs = {}
     if use_actors:
         kwargs = {"compute": "actors"}

From c5a6aa9d7a9b57ef9702d93c14d4e25155736069 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 17:57:56 +0000
Subject: [PATCH 06/11] merge

---
 python/ray/data/_internal/execution/streaming_executor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/ray/data/_internal/execution/streaming_executor.py b/python/ray/data/_internal/execution/streaming_executor.py
index 3d9f5779bd89..df3ebed6fd14 100644
--- a/python/ray/data/_internal/execution/streaming_executor.py
+++ b/python/ray/data/_internal/execution/streaming_executor.py
@@ -236,8 +236,7 @@ def _get_current_usage(self, topology: Topology) -> ExecutionResources:
             if isinstance(op, InputDataBuffer):
                 continue  # Don't count input refs towards dynamic memory usage.
             for bundle in state.outqueue:
-                if cur_usage.object_store_memory:
-                    cur_usage.object_store_memory += bundle.size_bytes()
+                cur_usage.object_store_memory += bundle.size_bytes()
         return cur_usage
 
     def _report_current_usage(

From 6c82ad253d45ccfed10f95b2eac2467f7b3da723 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 18:51:28 +0000
Subject: [PATCH 07/11] fix treating numpy as block format

---
 python/ray/data/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py
index bd61c92f645e..065d3098d1a0 100644
--- a/python/ray/data/dataset.py
+++ b/python/ray/data/dataset.py
@@ -2825,7 +2825,7 @@ def iter_rows(self, *, prefetch_blocks: int = 0) -> Iterator[Union[T, TableRow]]
         for batch in self.iter_batches(
             batch_size=None, prefetch_blocks=prefetch_blocks, batch_format=batch_format
         ):
-            batch = BlockAccessor.for_block(batch)
+            batch = BlockAccessor.for_block(BlockAccessor.batch_to_block(batch))
             for row in batch.iter_rows():
                 yield row
 

From 4cbcbaeec4a102abfac13b1128f92c461a033dfd Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 19:17:15 +0000
Subject: [PATCH 08/11] fix issues related to default dataset_format and
 partial execution in LazyBlocklist

---
 python/ray/data/tests/test_dataset.py | 46 +++++++++++++++++++++------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/python/ray/data/tests/test_dataset.py b/python/ray/data/tests/test_dataset.py
index 7322c9bc7df3..33624bd25d03 100644
--- a/python/ray/data/tests/test_dataset.py
+++ b/python/ray/data/tests/test_dataset.py
@@ -1369,17 +1369,26 @@ def test_count_lazy(ray_start_regular_shared):
 
 def test_lazy_loading_exponential_rampup(ray_start_regular_shared):
     ds = ray.data.range(100, parallelism=20)
-    assert ds._plan.execute()._num_computed() == 0
+
+    def check_num_computed(expected):
+        if ray.data.context.DatasetContext.get_current().use_streaming_executor:
+            # In streaing executor, ds.take() will not invoke partial execution
+            # in LazyBlocklist.
+            assert ds._plan.execute()._num_computed() == 0
+        else:
+            assert ds._plan.execute()._num_computed() == expected
+
+    check_num_computed(0)
     assert ds.take(10) == list(range(10))
-    assert ds._plan.execute()._num_computed() == 2
+    check_num_computed(2)
     assert ds.take(20) == list(range(20))
-    assert ds._plan.execute()._num_computed() == 4
+    check_num_computed(4)
     assert ds.take(30) == list(range(30))
-    assert ds._plan.execute()._num_computed() == 8
+    check_num_computed(8)
     assert ds.take(50) == list(range(50))
-    assert ds._plan.execute()._num_computed() == 16
+    check_num_computed(16)
     assert ds.take(100) == list(range(100))
-    assert ds._plan.execute()._num_computed() == 20
+    check_num_computed(20)
 
 
 def test_dataset_repr(ray_start_regular_shared):
@@ -1696,7 +1705,14 @@ def to_pylist(table):
     # Default ArrowRows.
     for row, t_row in zip(ds.iter_rows(), to_pylist(t)):
         assert isinstance(row, TableRow)
-        assert isinstance(row, ArrowRow)
+        # In streaming, we set batch_format to "default" because calling
+        # ds.dataset_format() will still invoke bulk execution and we want
+        # to avoid that. As a result, it's receiving PandasRow (the defaut
+        # batch format).
+        if ray.data.context.DatasetContext.get_current().use_streaming_executor:
+            assert isinstance(row, PandasRow)
+        else:
+            assert isinstance(row, ArrowRow)
         assert row == t_row
 
     # PandasRows after conversion.
@@ -1710,7 +1726,14 @@ def to_pylist(table):
     # Prefetch.
     for row, t_row in zip(ds.iter_rows(prefetch_blocks=1), to_pylist(t)):
         assert isinstance(row, TableRow)
-        assert isinstance(row, ArrowRow)
+        # In streaming, we set batch_format to "default" because calling
+        # ds.dataset_format() will still invoke bulk execution and we want
+        # to avoid that. As a result, it's receiving PandasRow (the defaut
+        # batch format).
+        if ray.data.context.DatasetContext.get_current().use_streaming_executor:
+            assert isinstance(row, PandasRow)
+        else:
+            assert isinstance(row, ArrowRow)
         assert row == t_row
 
 
@@ -2181,7 +2204,12 @@ def test_lazy_loading_iter_batches_exponential_rampup(ray_start_regular_shared):
     ds = ray.data.range(32, parallelism=8)
     expected_num_blocks = [1, 2, 4, 4, 8, 8, 8, 8]
     for _, expected in zip(ds.iter_batches(batch_size=None), expected_num_blocks):
-        assert ds._plan.execute()._num_computed() == expected
+        if ray.data.context.DatasetContext.get_current().use_streaming_executor:
+            # In streaming execution of ds.iter_batches(), there is no partial
+            # execution so _num_computed() in LazyBlocklist is 0.
+            assert ds._plan.execute()._num_computed() == 0
+        else:
+            assert ds._plan.execute()._num_computed() == expected
 
 
 def test_add_column(ray_start_regular_shared):

From 82bc7a7feeeecd2fe3811dc891aeb9f2b8f7d594 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 23:39:45 +0000
Subject: [PATCH 09/11] fix incorrect ownership resolution from bundle iter to
 blocklist

---
 python/ray/data/_internal/execution/legacy_compat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/ray/data/_internal/execution/legacy_compat.py b/python/ray/data/_internal/execution/legacy_compat.py
index e5d59d0f3106..0579f38c2cea 100644
--- a/python/ray/data/_internal/execution/legacy_compat.py
+++ b/python/ray/data/_internal/execution/legacy_compat.py
@@ -266,11 +266,13 @@ def bulk_fn(
 
 def _bundles_to_block_list(bundles: Iterator[RefBundle]) -> BlockList:
     blocks, metadata = [], []
+    owns_blocks = True
     for ref_bundle in bundles:
+        if not ref_bundle.owns_blocks:
+            owns_blocks = False
         for block, meta in ref_bundle.blocks:
             blocks.append(block)
             metadata.append(meta)
-    owns_blocks = all(b.owns_blocks for b in bundles)
     return BlockList(blocks, metadata, owned_by_consumer=owns_blocks)
 
 

From 7e31d5215f668d9a6e9deee900d5724a8aacb6d2 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 23:43:17 +0000
Subject: [PATCH 10/11] unset the streaming flag

---
 python/ray/data/context.py                    |  2 +-
 .../ray/data/tests/test_dataset_tfrecords.py  | 31 ++++++++++++-------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/python/ray/data/context.py b/python/ray/data/context.py
index 96cd4f86d00e..cb953f04696a 100644
--- a/python/ray/data/context.py
+++ b/python/ray/data/context.py
@@ -76,7 +76,7 @@
 # Whether to use the streaming executor. This only has an effect if the new execution
 # backend is enabled.
 DEFAULT_USE_STREAMING_EXECUTOR = bool(
-    int(os.environ.get("RAY_DATASET_USE_STREAMING_EXECUTOR", "1"))
+    int(os.environ.get("RAY_DATASET_USE_STREAMING_EXECUTOR", "0"))
 )
 
 # Whether to eagerly free memory (new backend only).
diff --git a/python/ray/data/tests/test_dataset_tfrecords.py b/python/ray/data/tests/test_dataset_tfrecords.py
index e61493947b6e..0286bf5d75a7 100644
--- a/python/ray/data/tests/test_dataset_tfrecords.py
+++ b/python/ray/data/tests/test_dataset_tfrecords.py
@@ -227,17 +227,17 @@ def test_readback_tfrecords(ray_start_regular_shared, tmp_path):
                 "bytes_empty": None,
             },
             # Row two.
-            {
-                "int_item": 2,
-                "int_list": [3, 3, 4],
-                "int_empty": [9, 2],
-                "float_item": 2.0,
-                "float_list": [5.0, 6.0, 7.0],
-                "float_empty": None,
-                "bytes_item": b"ghi",
-                "bytes_list": [b"jkl", b"5678"],
-                "bytes_empty": b"hello",
-            },
+            # {
+            #     "int_item": 2,
+            #     "int_list": [3, 3, 4],
+            #     "int_empty": [9, 2],
+            #     "float_item": 2.0,
+            #     "float_list": [5.0, 6.0, 7.0],
+            #     "float_empty": None,
+            #     "bytes_item": b"ghi",
+            #     "bytes_list": [b"jkl", b"5678"],
+            #     "bytes_empty": b"hello",
+            # },
         ],
         # Here and in the read_tfrecords call below, we specify `parallelism=1`
         # to ensure that all rows end up in the same block, which is required
@@ -245,11 +245,20 @@ def test_readback_tfrecords(ray_start_regular_shared, tmp_path):
         parallelism=1,
     )
 
+    print("XXX tmppath:", tmp_path)
+
+    print("XXXX old: datasetformat: ", ds.dataset_format(), " schema:", ds.schema())
+    ds.show()
+
     # Write the TFRecords.
     ds.write_tfrecords(tmp_path)
 
     # Read the TFRecords.
     readback_ds = ray.data.read_tfrecords(tmp_path)
+
+    readback_ds.show()
+    print("XXXX new : format:", readback_ds.dataset_format(), " schema:", ds.schema())
+
     assert ds.take() == readback_ds.take()
 
 

From c4b7f42261273b70fd52eaff41a2b2c217ed1c54 Mon Sep 17 00:00:00 2001
From: jianoaix <iamjianxiao@gmail.com>
Date: Wed, 22 Feb 2023 23:51:04 +0000
Subject: [PATCH 11/11] cleanup

---
 .../ray/data/tests/test_dataset_tfrecords.py  | 31 +++++++------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/python/ray/data/tests/test_dataset_tfrecords.py b/python/ray/data/tests/test_dataset_tfrecords.py
index 0286bf5d75a7..e61493947b6e 100644
--- a/python/ray/data/tests/test_dataset_tfrecords.py
+++ b/python/ray/data/tests/test_dataset_tfrecords.py
@@ -227,17 +227,17 @@ def test_readback_tfrecords(ray_start_regular_shared, tmp_path):
                 "bytes_empty": None,
             },
             # Row two.
-            # {
-            #     "int_item": 2,
-            #     "int_list": [3, 3, 4],
-            #     "int_empty": [9, 2],
-            #     "float_item": 2.0,
-            #     "float_list": [5.0, 6.0, 7.0],
-            #     "float_empty": None,
-            #     "bytes_item": b"ghi",
-            #     "bytes_list": [b"jkl", b"5678"],
-            #     "bytes_empty": b"hello",
-            # },
+            {
+                "int_item": 2,
+                "int_list": [3, 3, 4],
+                "int_empty": [9, 2],
+                "float_item": 2.0,
+                "float_list": [5.0, 6.0, 7.0],
+                "float_empty": None,
+                "bytes_item": b"ghi",
+                "bytes_list": [b"jkl", b"5678"],
+                "bytes_empty": b"hello",
+            },
         ],
         # Here and in the read_tfrecords call below, we specify `parallelism=1`
         # to ensure that all rows end up in the same block, which is required
@@ -245,20 +245,11 @@ def test_readback_tfrecords(ray_start_regular_shared, tmp_path):
         parallelism=1,
     )
 
-    print("XXX tmppath:", tmp_path)
-
-    print("XXXX old: datasetformat: ", ds.dataset_format(), " schema:", ds.schema())
-    ds.show()
-
     # Write the TFRecords.
     ds.write_tfrecords(tmp_path)
 
     # Read the TFRecords.
     readback_ds = ray.data.read_tfrecords(tmp_path)
-
-    readback_ds.show()
-    print("XXXX new : format:", readback_ds.dataset_format(), " schema:", ds.schema())
-
     assert ds.take() == readback_ds.take()