combined fill extend method and interior smoothing

bnb32 · bnb32 · commit 945ca3ac810a · 2023-05-29T11:23:24.000-06:00
diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py
@@ -553,9 +553,15 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
                                         bias_feature, base_dset)
         return out
 
-    def fill_extend(self, out, smooth_extend):
+    def fill_smooth_extend(self, out, fill_extend=True, smooth_extend=0,
+                           smooth_interior=0):
         """Fill data extending beyond the base meta data extent by doing a
-        nearest neighbor gap fill.
+        nearest neighbor gap fill. Smooth interior and extended region with
+        given smoothing values.
+        Interior smoothing can reduce the affect of extreme values
+        within aggregations over large number of pixels.
+        The interior is assumed to be defined by the region without nan values.
+        The extended region is assumed to be the region with nan values.
 
         Parameters
         ----------
@@ -564,11 +570,20 @@ def fill_extend(self, out, smooth_extend):
             data and the scalar + adder factors to correct the biased data
             like: bias_data * scalar + adder. Each value is of shape
             (lat, lon, time).
+        fill_extend : bool
+            Whether to fill data extending beyond the base meta data with
+            nearest neighbor values.
         smooth_extend : float
             Option to smooth the scalar/adder data outside of the spatial
             domain set by the threshold input. This alleviates the weird seams
             far from the domain of interest. This value is the standard
             deviation for the gaussian_filter kernel
+        smooth_interior : float
+            Value to use to smooth the scalar/adder data inside of the spatial
+            domain set by the threshold input. This can reduce the affect of
+            extreme values within aggregations over large number of pixels.
+            This value is the standard deviation for the gaussian_filter
+            kernel.
 
         Returns
         -------
@@ -581,12 +596,30 @@ def fill_extend(self, out, smooth_extend):
         for key, arr in out.items():
             nan_mask = np.isnan(arr[..., 0])
             for idt in range(self.NT):
-                arr[..., idt] = nn_fill_array(arr[..., idt])
+
+                arr_smooth = arr[..., idt]
+
+                needs_fill = (fill_extend or smooth_extend > 0
+                              or smooth_interior > 0)
+
+                if needs_fill:
+                    arr_smooth = nn_fill_array(arr_smooth)
+
+                arr_smooth_int = arr_smooth_ext = arr_smooth
+
                 if smooth_extend > 0:
-                    arr_smooth = gaussian_filter(arr[..., idt],
-                                                 smooth_extend,
-                                                 mode='nearest')
-                    out[key][nan_mask, idt] = arr_smooth[nan_mask]
+                    arr_smooth_ext = gaussian_filter(arr_smooth_ext,
+                                                     smooth_extend,
+                                                     mode='nearest')
+
+                if smooth_interior > 0:
+                    arr_smooth_int = gaussian_filter(arr_smooth_int,
+                                                     smooth_interior,
+                                                     mode='nearest')
+
+                out[key][nan_mask, idt] = arr_smooth_ext[nan_mask]
+                out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask]
+
         return out
 
     def write_outputs(self, fp_out, out):
@@ -623,7 +656,8 @@ def write_outputs(self, fp_out, out):
                             .format(fp_out))
 
     def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
-            daily_reduction='avg', fill_extend=True, smooth_extend=0):
+            daily_reduction='avg', fill_extend=True, smooth_extend=0,
+            smooth_interior=0):
         """Run linear correction factor calculations for every site in the bias
         dataset
 
@@ -654,6 +688,10 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
             domain set by the threshold input. This alleviates the weird seams
             far from the domain of interest. This value is the standard
             deviation for the gaussian_filter kernel
+        smooth_interior : float
+            Option to smooth the scalar/adder data within the valid spatial
+            domain.  This can reduce the affect of extreme values within
+            aggregations over large number of pixels.
 
         Returns
         -------
@@ -732,8 +770,8 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
 
         logger.info('Finished calculating bias correction factors.')
 
-        if fill_extend:
-            out = self.fill_extend(out, smooth_extend)
+        out = self.fill_smooth_extend(out, fill_extend, smooth_extend,
+                                      smooth_interior)
 
         self.write_outputs(fp_out, out)
 
diff --git a/sup3r/utilities/utilities.py b/sup3r/utilities/utilities.py
@@ -4,22 +4,22 @@
 @author: bbenton
 """
 
-import numpy as np
-import logging
 import glob
-from scipy import ndimage as nd
-from scipy.interpolate import RegularGridInterpolator
-from scipy.interpolate import interp1d
-from scipy.ndimage import zoom
-from scipy.ndimage.filters import gaussian_filter
-from fnmatch import fnmatch
+import logging
 import os
 import re
+from fnmatch import fnmatch
 from warnings import warn
-import psutil
+
+import numpy as np
 import pandas as pd
-from packaging import version
+import psutil
 import xarray as xr
+from packaging import version
+from scipy import ndimage as nd
+from scipy.interpolate import RegularGridInterpolator, interp1d
+from scipy.ndimage import zoom
+from scipy.ndimage.filters import gaussian_filter
 
 np.random.seed(42)
 
@@ -140,7 +140,7 @@ def get_chunk_slices(arr_size, chunk_size, index_slice=slice(None)):
 
 
 def get_raster_shape(raster_index):
-    """method to get shape of raster_index"""
+    """Method to get shape of raster_index"""
 
     if any(isinstance(r, slice) for r in raster_index):
         shape = (raster_index[0].stop - raster_index[0].start,
@@ -182,7 +182,7 @@ def get_wrf_date_range(files):
 
 
 def uniform_box_sampler(data, shape):
-    '''Extracts a sample cut from data.
+    """Extracts a sample cut from data.
 
     Parameters
     ----------
@@ -197,7 +197,7 @@ def uniform_box_sampler(data, shape):
     -------
     slices : list
         List of slices corresponding to row and col extent of arr sample
-    '''
+    """
 
     shape_1 = data.shape[0] if data.shape[0] < shape[0] else shape[0]
     shape_2 = data.shape[1] if data.shape[1] < shape[1] else shape[1]
@@ -299,7 +299,7 @@ def weighted_time_sampler(data, shape, weights):
 
 
 def uniform_time_sampler(data, shape):
-    '''Extracts a temporal slice from data.
+    """Extracts a temporal slice from data.
 
     Parameters
     ----------
@@ -314,7 +314,7 @@ def uniform_time_sampler(data, shape):
     -------
     slice : slice
         time slice with size shape
-    '''
+    """
     shape = data.shape[2] if data.shape[2] < shape else shape
     start = np.random.randint(0, data.shape[2] - shape + 1)
     stop = start + shape
@@ -996,7 +996,7 @@ def potential_temperature_difference(T_top, P_top, T_bottom, P_bottom):
     """Potential temp difference calculation
 
     Parameters
-    ---------
+    ----------
     T_top : ndarray
         Temperature at higher height. Used in the approximation of potential
         temperature derivative
@@ -1023,7 +1023,7 @@ def potential_temperature_average(T_top, P_top, T_bottom, P_bottom):
     """Potential temp average calculation
 
     Parameters
-    ---------
+    ----------
     T_top : ndarray
         Temperature at higher height. Used in the approximation of potential
         temperature derivative
diff --git a/tests/training/test_bias_correction.py b/tests/training/test_bias_correction.py
@@ -1,19 +1,19 @@
 # -*- coding: utf-8 -*-
 """pytests bias correction calculations"""
-import h5py
 import os
-import pytest
 import tempfile
+
+import h5py
 import numpy as np
+import pytest
 import xarray as xr
 
-from sup3r import TEST_DATA_DIR, CONFIG_DIR
-from sup3r.models import Sup3rGan
-from sup3r.qa.qa import Sup3rQa
-from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy
+from sup3r import CONFIG_DIR, TEST_DATA_DIR
 from sup3r.bias.bias_calc import LinearCorrection, MonthlyLinearCorrection
 from sup3r.bias.bias_transforms import local_linear_bc, monthly_local_linear_bc
-
+from sup3r.models import Sup3rGan
+from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy
+from sup3r.qa.qa import Sup3rQa
 
 FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5')
 FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc')
@@ -25,6 +25,39 @@
     SHAPE = (len(fh.lat.values), len(fh.lon.values))
 
 
+def test_smooth_interior_bc():
+    """Test linear bias correction with interior smoothing"""
+
+    calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
+                            TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
+
+    out = calc.run(knn=1, threshold=0.6, fill_extend=False, max_workers=1)
+    og_scalar = out['rsds_scalar']
+    og_adder = out['rsds_adder']
+    nan_mask = np.isnan(og_scalar)
+    assert np.isnan(og_adder[nan_mask]).all()
+
+    out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_interior=0,
+                   max_workers=1)
+    scalar = out['rsds_scalar']
+    adder = out['rsds_adder']
+    # Make sure smooth_interior=0 does not change interior pixels
+    assert np.allclose(og_scalar[~nan_mask], scalar[~nan_mask])
+    assert np.allclose(og_adder[~nan_mask], adder[~nan_mask])
+    assert not np.isnan(adder[nan_mask]).any()
+    assert not np.isnan(scalar[nan_mask]).any()
+
+    # make sure smoothing affects the interior pixels but not the exterior
+    out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_interior=1,
+                   max_workers=1)
+    smooth_scalar = out['rsds_scalar']
+    smooth_adder = out['rsds_adder']
+    assert not np.allclose(smooth_scalar[~nan_mask], scalar[~nan_mask])
+    assert not np.allclose(smooth_adder[~nan_mask], adder[~nan_mask])
+    assert np.allclose(smooth_scalar[nan_mask], scalar[nan_mask])
+    assert np.allclose(smooth_adder[nan_mask], adder[nan_mask])
+
+
 def test_linear_bc():
     """Test linear bias correction"""