pydata · eshort0401 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -29,6 +29,8 @@ Bug Fixes
 - Ensure that ``keep_attrs='drop'`` and ``keep_attrs=False`` remove attrs from result, even when there is
   only one xarray object given to ``apply_ufunc`` (:issue:`10982` :pull:`10997`).
   By `Julia Signell <https://github.com/jsignell>`_.
+- Improve the robustness of ``open_zarr`` by attempting to load datasets with default dimension names if dimension name metadata is missing (:issue:`8749`).
+  By `Ewan Short <https://github.com/eshort0401>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -4,6 +4,7 @@
 import json
 import os
 import struct
+import warnings
 from collections.abc import Hashable, Iterable, Mapping
 from typing import TYPE_CHECKING, Any, Literal, Self, cast
 
@@ -355,56 +356,96 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
 
 
 def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr):
-    # Zarr V3 explicitly stores the dimension names in the metadata
+    def get_default_dims(zarr_obj):
+        # Helper function to create default dimension names in cases where these don't
+        # exist in the metadata. Note the dimension_names parameter is optional in
+        # zarr version 3
+        # https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#dimension-names
+        # Dimension name metadata is also optional in zarr version 2
+        # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#attributes
+
+        return tuple(f"dim_{n}" for n in range(len(zarr_obj.shape)))
+
+    def get_nczarr_dims(zarr_obj):
+        # Helper function to extract dimension names from NCZarr metadata in .zarray
+        # https://docs.unidata.ucar.edu/netcdf/NUG/nczarr_head.html
+        zarray_path = os.path.join(zarr_obj.path, ".zarray")
+        # Check available zarr module version (not zarr version of zarr_obj) version
+        if _zarr_v3():
+            import asyncio
+
+            zarray_str = asyncio.run(zarr_obj.store.get(zarray_path)).to_bytes()
+        else:
+            zarray_str = zarr_obj.store.get(zarray_path)
+        zarray = json.loads(zarray_str)
+        try:
+            # NCZarr uses Fully Qualified Names
+            dimensions = [
+                os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"]
+            ]
+        except KeyError:
+            dimensions = get_default_dims(zarr_obj)
+        return dimensions
+
+    # Zarr V3 specifies an optional dimension_names array metadata parameter, so
+    # check if this exists
     try:
-        # if this exists, we are looking at a Zarr V3 array
-        # convert None to empty tuple
+        # If dimension_names exists, we are looking at a Zarr V3 array
+        # Convert None to empty tuple
         dimensions = zarr_obj.metadata.dimension_names or ()
     except AttributeError:
-        # continue to old code path
+        # Continue to old code path
         pass
     else:
+        # Check number of dimensions in metadata match shape of array.
+        # If not, use defaults
         attributes = dict(zarr_obj.attrs)
         if len(zarr_obj.shape) != len(dimensions):
-            raise KeyError(
-                "Zarr object is missing the `dimension_names` metadata which is "
-                "required for xarray to determine variable dimensions."
-            )
+            if not dimensions:
+                message = "Missing metadata dimension names."
+            else:
+                message = (
+                    f"Metadata dimension names {dimensions} inconsistent with array "
+                    f"shape {zarr_obj.shape}."
+                )
+            message += " Attempting with defaults."
+            warnings.warn(message, UserWarning, stacklevel=2)
+            dimensions = get_default_dims(zarr_obj)
         return dimensions, attributes
 
-    # Zarr arrays do not have dimensions. To get around this problem, we add
-    # an attribute that specifies the dimension. We have to hide this attribute
-    # when we send the attributes to the user.
-    # zarr_obj can be either a zarr group or zarr array
+    # Zarr 2 arrays do not necessarily have dimension names.
+    # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#attributes
+    # To get around this problem, we add an attribute that specifies the
+    # dimension. We have to hide this attribute when we send the
+    # attributes to the user. Note zarr_obj can be either a zarr group
+    # or zarr array
+
+    # First try to read dimension names using old xarray-zarr convention.
+    # dimension_key typically _ARRAY_DIMENSIONS
     try:
         # Xarray-Zarr
         dimensions = zarr_obj.attrs[dimension_key]
-    except KeyError as e:
+    except KeyError:
+        warnings.warn(
+            "Failed to read dimension names from xarray zarr metadata.",
+            UserWarning,
+            stacklevel=2,
+        )
         if not try_nczarr:
-            raise KeyError(
-                f"Zarr object is missing the attribute `{dimension_key}`, which is "
-                "required for xarray to determine variable dimensions."
-            ) from e
-
-        # NCZarr defines dimensions through metadata in .zarray
-        zarray_path = os.path.join(zarr_obj.path, ".zarray")
-        if _zarr_v3():
-            import asyncio
-
-            zarray_str = asyncio.run(zarr_obj.store.get(zarray_path)).to_bytes()
+            # Skip straight to using default dimensions
+            dimensions = get_default_dims(zarr_obj)
         else:
-            zarray_str = zarr_obj.store.get(zarray_path)
-        zarray = json.loads(zarray_str)
-        try:
-            # NCZarr uses Fully Qualified Names
-            dimensions = [
-                os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"]
-            ]
-        except KeyError as e:
-            raise KeyError(
-                f"Zarr object is missing the attribute `{dimension_key}` and the NCZarr metadata, "
-                "which are required for xarray to determine variable dimensions."
-            ) from e
+            # Try to read dimension names using NCZarr convention
+            try:
+                dimensions = get_nczarr_dims(zarr_obj)
+            except Exception:
+                # Fallback to default dimension names
+                warnings.warn(
+                    "Failed to read dimension names from netcdf zarr metadata.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                dimensions = get_default_dims(zarr_obj)
 
     nc_attrs = [attr for attr in zarr_obj.attrs if attr.lower().startswith("_nc")]
     attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key] + nc_attrs)
@@ -1424,8 +1465,10 @@ def open_zarr(
     """Load and decode a dataset from a Zarr store.
 
     The `store` object should be a valid store for a Zarr group. `store`
-    variables must contain dimension metadata encoded in the
-    `_ARRAY_DIMENSIONS` attribute or must have NCZarr format.
+    variables should contain dimension metadata encoded in the
+    `_ARRAY_DIMENSIONS` attribute for zarr 2, the `dimension_names` attribute for zarr
+    3, or the `dim_refs` parameter for NCZarr. If dimension name metadata is missing,
+    `open_zarr` will attempt to build the dataset using default dimension names.
 
     Parameters
     ----------

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2,8 +2,10 @@
 
 import asyncio
 import contextlib
+import glob
 import gzip
 import itertools
+import json
 import math
 import os.path
 import pickle
@@ -3022,7 +3024,7 @@ def test_hidden_zarr_keys(self) -> None:
             del attrs[self.DIMENSION_KEY]
             zarr_group["var2"].attrs.put(attrs)
 
-            with pytest.raises(KeyError):
+            with pytest.warns(UserWarning, match="Failed to read dimension names"):
                 with xr.decode_cf(store):
                     pass
 
@@ -4371,6 +4373,87 @@ def create_zarr_target(self):
         with create_tmp_file(suffix=".zarr") as tmp:
             yield tmp
 
+    # Helper functions for stripping dimension metadata from zarr stores
+    def _strip_zarr_3(self, ds_path, stripped_ds_path):
+        """Create a copy of a zarr 3 with dimension_names metadata removed."""
+        shutil.copytree(ds_path, stripped_ds_path, dirs_exist_ok=True)
+        # Get all the zarr.json metadata files.
+        metadata_files = glob.glob(f"{stripped_ds_path}/**/zarr.json", recursive=True)
+        # Iterate through and remove all "dimension_names" entries
+        for file in metadata_files:
+            with open(file) as f:
+                metadata = json.load(f)
+            metadata.pop("dimension_names", None)
+            con_metadata = metadata.get("consolidated_metadata", None)
+            if con_metadata:
+                for k in con_metadata["metadata"].keys():
+                    con_metadata["metadata"][k].pop("dimension_names", None)
+
+            with open(file, "w") as f:
+                json.dump(metadata, f, indent=2)
+
+    def _strip_zarr_2(self, ds_path, stripped_ds_path):
+        """Create a copy of a zarr 2 with _ARRAY_DIMENSIONS metadata removed."""
+        # Get all the .zattrs files. Note .zattrs are optional in zarr 2, but xarray uses
+        # them to store dimension name metadata.
+        # https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#attributes
+        shutil.copytree(ds_path, stripped_ds_path, dirs_exist_ok=True)
+        zattrs_files = glob.glob(f"{stripped_ds_path}/**/.zattrs", recursive=True)
+        # Iterate through and remove all "_ARRAY_DIMENSIONS" entries
+        for file in zattrs_files:
+            with open(file) as f:
+                metadata = json.load(f)
+            metadata.pop("_ARRAY_DIMENSIONS", None)
+            with open(file, "w") as f:
+                json.dump(metadata, f, indent=2)
+        zmetadata_file = Path(stripped_ds_path) / ".zmetadata"
+        if zmetadata_file.exists():
+            with open(zmetadata_file) as f:
+                metadata = json.load(f)
+            for k in metadata["metadata"].keys():
+                metadata["metadata"][k].pop("_ARRAY_DIMENSIONS", None)
+            with open(zmetadata_file, "w") as f:
+                json.dump(metadata, f, indent=2)
+
+    @pytest.mark.parametrize("consolidated", [True, False])
+    def test_default_dims(self, consolidated):
+        zarr_format = zarr.config.get("default_zarr_format") if has_zarr_v3 else 2
+        # Create example data that can be read without dimension name metadata
+        da_a = xr.DataArray(np.arange(3 * 18).reshape(3, 18), dims=["label", "z"])
+        da_b = xr.DataArray(np.arange(3), dims="label")
+        ds_1 = xr.Dataset({"a": da_a, "b": da_b})
+
+        # Specify what we expect to get when dimension name metadata is missing
+        expected = ds_1.rename_dims({"label": "dim_0", "z": "dim_1"})
+
+        def get_stripped_ds(ds, consolidated, zarr_format):
+            with self.create_zarr_target() as ds_target:
+                kwargs = {"consolidated": consolidated, "zarr_format": zarr_format}
+                ds.to_zarr(ds_target, **kwargs)
+                with self.create_zarr_target() as stripped_ds_target:
+                    if zarr_format == 3:
+                        self._strip_zarr_3(ds_target, stripped_ds_target)
+                    else:
+                        self._strip_zarr_2(ds_target, stripped_ds_target)
+                    with pytest.warns(UserWarning, match="dimension names"):
+                        return xr.open_zarr(stripped_ds_target, **kwargs).compute()
+
+        stripped_ds_1 = get_stripped_ds(ds_1, consolidated, zarr_format)
+        assert_equal(stripped_ds_1, expected)
+
+        # Create example data that cannot be read without dimension name metadata
+        da_c = xr.DataArray(np.arange(18), dims="z")
+        ds_2 = xr.Dataset({"a": da_a, "c": da_c})
+
+        with pytest.raises(ValueError, match="conflicting sizes for dimension"):
+            get_stripped_ds(ds_2, consolidated, zarr_format)
+
+        # Failure to open_zarr on ds_2 reflects the failure to create an xarray Dataset without
+        # proper labeling of dimensions; variables must have consistent dimensions reading
+        # shape from left to right.
+        with pytest.raises(AlignmentError, match="cannot reindex or align along"):
+            xr.Dataset({"a": xr.DataArray(da_a.values), "c": xr.DataArray(da_c.values)})
+
 
 @requires_zarr
 class TestZarrWriteEmpty(TestZarrDirectoryStore):
@@ -7587,13 +7670,13 @@ def test_zarr_create_default_indexes(tmp_path, create_default_indexes) -> None:
 
 @requires_zarr
 @pytest.mark.usefixtures("default_zarr_format")
-def test_raises_key_error_on_invalid_zarr_store(tmp_path):
+def test_user_warning_on_invalid_zarr_store(tmp_path):
     root = zarr.open_group(tmp_path / "tmp.zarr")
     if Version(zarr.__version__) < Version("3.0.0"):
         root.create_dataset("bar", shape=(3, 5), dtype=np.float32)
     else:
         root.create_array("bar", shape=(3, 5), dtype=np.float32)
-    with pytest.raises(KeyError, match=r"xarray to determine variable dimensions"):
+    with pytest.warns(UserWarning, match=r"dimension names"):
         xr.open_zarr(tmp_path / "tmp.zarr", consolidated=False)