From 5728f5311d9cd05c42bdcb4ef835b553be3dbbe2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 04:20:49 +0000
Subject: [PATCH 1/2] Initial plan


From aac261c9f178d1fb284f714a69a963c190949f58 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 04:35:33 +0000
Subject: [PATCH 2/2] fix: handle zero-size tensors in IsContiguousTensor for
 DLPack conversion

NumPy 2.x sets all strides to 0 for zero-size tensors, which caused the
right-to-left stride check to fail on the first non-zero dimension (e.g.,
dimension 128 with stride 0 != running_size 1) before even reaching the
zero dimension. Fix by pre-scanning all dimensions for any zero size
before performing per-dimension stride validation.

Add regression tests in test_ort_value_dlpack_zero_size covering the
KV-cache use case shape (1, 8, 0, 128) and other zero-size shapes.

Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
---
 onnxruntime/core/dlpack/dlpack_converter.cc   | 10 ++++++--
 .../test/python/onnxruntime_test_python.py    | 23 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/dlpack/dlpack_converter.cc b/onnxruntime/core/dlpack/dlpack_converter.cc
index cb7fe9af5d1ae..22d5bdf6f23d8 100644
--- a/onnxruntime/core/dlpack/dlpack_converter.cc
+++ b/onnxruntime/core/dlpack/dlpack_converter.cc
@@ -162,12 +162,18 @@ bool IsContiguousTensor(const DLTensor& tensor) {
     return true;
   }
 
-  int64_t running_size = 1;
-  for (int i = tensor.ndim - 1; i >= 0; i--) {
+  // Zero-size tensors (any dimension equals 0) have no elements, so any stride
+  // layout is vacuously contiguous. Check upfront before validating strides,
+  // because some frameworks (e.g. NumPy 2.x) set all strides to 0 for zero-size
+  // tensors, which would otherwise fail the per-dimension stride check below.
+  for (int i = 0; i < tensor.ndim; i++) {
     if (tensor.shape[i] == 0) {
       return true;
     }
+  }
 
+  int64_t running_size = 1;
+  for (int i = tensor.ndim - 1; i >= 0; i--) {
     if (tensor.shape[i] != 1 && tensor.strides[i] != running_size) {
       return false;
     }
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index fe98cc2ad561a..81a7c00354fcc 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -1444,6 +1444,29 @@ def test_ort_value_dlpack(self):
         device = ortvalue._ortvalue.__dlpack_device__()
         self.assertEqual((1, 0), device)
 
+    @unittest.skipIf(not hasattr(C.OrtValue, "from_dlpack"), "dlpack not enabled in this build")
+    def test_ort_value_dlpack_zero_size(self):
+        # Zero-size tensors are vacuously contiguous; from_dlpack must accept them.
+        # Regression test: OrtValue.from_dlpack was incorrectly rejecting zero-size tensors.
+        zero_size_shapes = [
+            (1, 8, 0, 128),  # zero in the middle (KV-cache use case)
+            (0,),  # 1-D zero-size
+            (0, 4),  # zero leading dimension
+            (4, 0),  # zero trailing dimension
+        ]
+        for shape in zero_size_shapes:
+            with self.subTest(shape=shape):
+                arr = np.zeros(shape, dtype=np.float32)
+                # Test via numpy __dlpack__ protocol
+                dlp = arr.__dlpack__()
+                ortvalue = C.OrtValue.from_dlpack(dlp, False)
+                self.assertEqual(list(shape), list(ortvalue.shape()))
+                # Test round-trip: OrtValue -> dlpack -> OrtValue
+                ort_input = onnxrt.OrtValue.ortvalue_from_numpy(arr)
+                dlp2 = ort_input._ortvalue.to_dlpack()
+                ortvalue2 = C.OrtValue.from_dlpack(dlp2, False)
+                self.assertEqual(list(shape), list(ortvalue2.shape()))
+
     def test_sparse_tensor_coo_format(self):
         cpu_device = onnxrt.OrtDevice.make("cpu", 0)
         shape = [9, 9]