Skip to content

Commit 9cf8313

Browse files
committed
Fix case where we still need to set a no data value as nan
1 parent 5441454 commit 9cf8313

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

src/reformatters/contrib/noaa/ndvi_cdr/analysis/region_job.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ def _read_netcdf_data(
165165
) -> ArrayFloat32 | ArrayInt16:
166166
"""Read data from NetCDF file."""
167167
out_dtype = data_var.encoding.dtype
168-
encoding_fill_value = data_var.encoding.fill_value
169168

170169
var_name = data_var.internal_attrs.netcdf_var_name
171170
netcdf_fill_value = data_var.internal_attrs.fill_value
@@ -183,7 +182,19 @@ def _read_netcdf_data(
183182

184183
# Set invalid values to NaN before scaling (for float data)
185184
if var_name != QA_NETCDF_VAR_NAME:
186-
result[result == netcdf_fill_value] = encoding_fill_value
185+
# We are using a different fill value here than the data var encoding fill value
186+
# This is because encoding fill value was previously NaN, and so when we matched
187+
# matched our no data value, we set values to NaN. We have now changed the
188+
# encoding fill value to 0. This is to accomdate the fact that due to an Xarray bug,
189+
# the encoding fill value was not round tripped (it was persisted as 0 despite the
190+
# definition in our encoding). We have updated the encoding fill value to 0 to match
191+
# what was written at the time of our backfill. That change ensures that empty chunks
192+
# continue to be interpreted as 0. But consequently, we need to ensure that when we
193+
# are setting the no data value when reading the netcdf data, we continue to use NaN.
194+
if data_var.internal_attrs.read_data_fill_value is not None:
195+
result[result == netcdf_fill_value] = (
196+
data_var.internal_attrs.read_data_fill_value
197+
)
187198

188199
assert scale_factor is not None
189200
assert add_offset is not None

src/reformatters/contrib/noaa/ndvi_cdr/analysis/template_config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class NoaaNdviCdrInternalAttrs(BaseInternalAttrs):
4444
scale_factor: float | None = None
4545
add_offset: float | None = None
4646
valid_range: tuple[float, float] | None = None
47+
read_data_fill_value: float | None = None
4748

4849

4950
class NoaaNdviCdrDataVar(DataVar[NoaaNdviCdrInternalAttrs]):
@@ -243,6 +244,7 @@ def data_vars(self) -> Sequence[NoaaNdviCdrDataVar]:
243244
scale_factor=0.0001,
244245
add_offset=0.0,
245246
valid_range=(-1000, 10000),
247+
read_data_fill_value=np.nan,
246248
),
247249
),
248250
NoaaNdviCdrDataVar(
@@ -262,6 +264,7 @@ def data_vars(self) -> Sequence[NoaaNdviCdrDataVar]:
262264
scale_factor=0.0001,
263265
add_offset=0.0,
264266
valid_range=(-1000, 10000),
267+
read_data_fill_value=np.nan,
265268
),
266269
),
267270
NoaaNdviCdrDataVar(

src/reformatters/contrib/uarizona/swann/analysis/region_job.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,19 @@ def read_data(
140140
no_data_value = -999
141141
with rasterio.open(netcdf_path) as reader:
142142
result: Array2D[np.float32] = reader.read(band, out_dtype=np.float32)
143-
result[result == no_data_value] = np.nan
143+
# We are using a different fill value here than the data var encoding fill value
144+
# This is because encoding fill value was previously NaN, and so when we matched
145+
# matched our no data value, we set values to NaN. We have now changed the
146+
# encoding fill value to 0. This is to accomdate the fact that due to an Xarray bug,
147+
# the encoding fill value was not round tripped (it was persisted as 0 despite the
148+
# definition in our encoding). We have updated the encoding fill value to 0 to match
149+
# what was written at the time of our backfill. That change ensures that empty chunks
150+
# continue to be interpreted as 0. But consequently, we need to ensure that when we
151+
# are setting the no data value when reading the netcdf data, we continue to use NaN.
152+
if data_var.internal_attrs.read_data_fill_value is not None:
153+
result[result == no_data_value] = (
154+
data_var.internal_attrs.read_data_fill_value
155+
)
144156
assert result.shape == (621, 1405)
145157
return result
146158

src/reformatters/contrib/uarizona/swann/analysis/template_config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
class UarizonaSwannInternalAttrs(BaseInternalAttrs):
2929
netcdf_var_name: str
30+
read_data_fill_value: float | None
3031

3132

3233
class UarizonaSwannDataVar(DataVar[UarizonaSwannInternalAttrs]):
@@ -219,6 +220,7 @@ def data_vars(self) -> Sequence[UarizonaSwannDataVar]:
219220
internal_attrs=UarizonaSwannInternalAttrs(
220221
keep_mantissa_bits=default_keep_mantissa_bits,
221222
netcdf_var_name="SWE",
223+
read_data_fill_value=np.nan,
222224
),
223225
),
224226
UarizonaSwannDataVar(
@@ -234,6 +236,7 @@ def data_vars(self) -> Sequence[UarizonaSwannDataVar]:
234236
internal_attrs=UarizonaSwannInternalAttrs(
235237
keep_mantissa_bits=default_keep_mantissa_bits,
236238
netcdf_var_name="DEPTH",
239+
read_data_fill_value=np.nan,
237240
),
238241
),
239242
]

0 commit comments

Comments
 (0)