Fix case where we still need to set a no data value as nan

mosegontar · mosegontar · commit 9cf8313310e9 · 2025-10-27T09:32:54.000-04:00
diff --git a/src/reformatters/contrib/noaa/ndvi_cdr/analysis/region_job.py b/src/reformatters/contrib/noaa/ndvi_cdr/analysis/region_job.py
@@ -165,7 +165,6 @@ def _read_netcdf_data(
     ) -> ArrayFloat32 | ArrayInt16:
         """Read data from NetCDF file."""
         out_dtype = data_var.encoding.dtype
-        encoding_fill_value = data_var.encoding.fill_value
 
         var_name = data_var.internal_attrs.netcdf_var_name
         netcdf_fill_value = data_var.internal_attrs.fill_value
@@ -183,7 +182,19 @@ def _read_netcdf_data(
 
             # Set invalid values to NaN before scaling (for float data)
             if var_name != QA_NETCDF_VAR_NAME:
-                result[result == netcdf_fill_value] = encoding_fill_value
+                # We are using a different fill value here than the data var encoding fill value
+                # This is because encoding fill value was previously NaN, and so when we matched
+                # matched our no data value, we set values to NaN. We have now changed the
+                # encoding fill value to 0. This is to accomdate the fact that due to an Xarray bug,
+                # the encoding fill value was not round tripped (it was persisted as 0 despite the
+                # definition in our encoding). We have updated the encoding fill value to 0 to match
+                # what was written at the time of our backfill. That change ensures that empty chunks
+                # continue to be interpreted as 0. But consequently, we need to ensure that when we
+                # are setting the no data value when reading the netcdf data, we continue to use NaN.
+                if data_var.internal_attrs.read_data_fill_value is not None:
+                    result[result == netcdf_fill_value] = (
+                        data_var.internal_attrs.read_data_fill_value
+                    )
 
                 assert scale_factor is not None
                 assert add_offset is not None
diff --git a/src/reformatters/contrib/noaa/ndvi_cdr/analysis/template_config.py b/src/reformatters/contrib/noaa/ndvi_cdr/analysis/template_config.py
@@ -44,6 +44,7 @@ class NoaaNdviCdrInternalAttrs(BaseInternalAttrs):
     scale_factor: float | None = None
     add_offset: float | None = None
     valid_range: tuple[float, float] | None = None
+    read_data_fill_value: float | None = None
 
 
 class NoaaNdviCdrDataVar(DataVar[NoaaNdviCdrInternalAttrs]):
@@ -243,6 +244,7 @@ def data_vars(self) -> Sequence[NoaaNdviCdrDataVar]:
                     scale_factor=0.0001,
                     add_offset=0.0,
                     valid_range=(-1000, 10000),
+                    read_data_fill_value=np.nan,
                 ),
             ),
             NoaaNdviCdrDataVar(
@@ -262,6 +264,7 @@ def data_vars(self) -> Sequence[NoaaNdviCdrDataVar]:
                     scale_factor=0.0001,
                     add_offset=0.0,
                     valid_range=(-1000, 10000),
+                    read_data_fill_value=np.nan,
                 ),
             ),
             NoaaNdviCdrDataVar(
diff --git a/src/reformatters/contrib/uarizona/swann/analysis/region_job.py b/src/reformatters/contrib/uarizona/swann/analysis/region_job.py
@@ -140,7 +140,19 @@ def read_data(
         no_data_value = -999
         with rasterio.open(netcdf_path) as reader:
             result: Array2D[np.float32] = reader.read(band, out_dtype=np.float32)
-            result[result == no_data_value] = np.nan
+            # We are using a different fill value here than the data var encoding fill value
+            # This is because encoding fill value was previously NaN, and so when we matched
+            # matched our no data value, we set values to NaN. We have now changed the
+            # encoding fill value to 0. This is to accomdate the fact that due to an Xarray bug,
+            # the encoding fill value was not round tripped (it was persisted as 0 despite the
+            # definition in our encoding). We have updated the encoding fill value to 0 to match
+            # what was written at the time of our backfill. That change ensures that empty chunks
+            # continue to be interpreted as 0. But consequently, we need to ensure that when we
+            # are setting the no data value when reading the netcdf data, we continue to use NaN.
+            if data_var.internal_attrs.read_data_fill_value is not None:
+                result[result == no_data_value] = (
+                    data_var.internal_attrs.read_data_fill_value
+                )
             assert result.shape == (621, 1405)
             return result
 
diff --git a/src/reformatters/contrib/uarizona/swann/analysis/template_config.py b/src/reformatters/contrib/uarizona/swann/analysis/template_config.py
@@ -27,6 +27,7 @@
 
 class UarizonaSwannInternalAttrs(BaseInternalAttrs):
     netcdf_var_name: str
+    read_data_fill_value: float | None
 
 
 class UarizonaSwannDataVar(DataVar[UarizonaSwannInternalAttrs]):
@@ -219,6 +220,7 @@ def data_vars(self) -> Sequence[UarizonaSwannDataVar]:
                 internal_attrs=UarizonaSwannInternalAttrs(
                     keep_mantissa_bits=default_keep_mantissa_bits,
                     netcdf_var_name="SWE",
+                    read_data_fill_value=np.nan,
                 ),
             ),
             UarizonaSwannDataVar(
@@ -234,6 +236,7 @@ def data_vars(self) -> Sequence[UarizonaSwannDataVar]:
                 internal_attrs=UarizonaSwannInternalAttrs(
                     keep_mantissa_bits=default_keep_mantissa_bits,
                     netcdf_var_name="DEPTH",
+                    read_data_fill_value=np.nan,
                 ),
             ),
         ]