14
14
15
15
import dask .array as da
16
16
import numpy as np
17
- import pandas as pd
18
17
from rex .utilities .bc_utils import QuantileDeltaMapping
19
18
from scipy .ndimage import gaussian_filter
20
19
21
20
from sup3r .preprocessing import Rasterizer
21
+ from sup3r .preprocessing .utilities import make_time_index_from_kws
22
22
23
23
logger = logging .getLogger (__name__ )
24
24
@@ -27,7 +27,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
27
27
"""Get bias correction factors from sup3r's standard resource
28
28
29
29
This was stripped without any change from original
30
- `get_spatial_bc_factors ` to allow re-use in other `*_bc_factors`
30
+ `_get_spatial_bc_factors ` to allow re-use in other `*_bc_factors`
31
31
functions.
32
32
33
33
Parameters
@@ -76,7 +76,7 @@ def _get_factors(target, shape, var_names, bias_fp, threshold=0.1):
76
76
return out
77
77
78
78
79
- def get_spatial_bc_factors (lat_lon , feature_name , bias_fp , threshold = 0.1 ):
79
+ def _get_spatial_bc_factors (lat_lon , feature_name , bias_fp , threshold = 0.1 ):
80
80
"""Get bc factors (scalar/adder) for the given feature for the given
81
81
domain (specified by lat_lon).
82
82
@@ -114,7 +114,7 @@ def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1):
114
114
)
115
115
116
116
117
- def get_spatial_bc_quantiles (
117
+ def _get_spatial_bc_quantiles (
118
118
lat_lon : Union [np .ndarray , da .core .Array ],
119
119
base_dset : str ,
120
120
feature_name : str ,
@@ -200,7 +200,7 @@ def get_spatial_bc_quantiles(
200
200
>>> lat_lon = np.array([
201
201
... [39.649033, -105.46875 ],
202
202
... [39.649033, -104.765625]])
203
- >>> params = get_spatial_bc_quantiles (
203
+ >>> params = _get_spatial_bc_quantiles (
204
204
... lat_lon, "ghi", "rsds", "./dist_params.hdf")
205
205
206
206
"""
@@ -297,7 +297,7 @@ def local_linear_bc(
297
297
out = data * scalar + adder
298
298
"""
299
299
300
- out = get_spatial_bc_factors (lat_lon , feature_name , bias_fp )
300
+ out = _get_spatial_bc_factors (lat_lon , feature_name , bias_fp )
301
301
scalar , adder = out ['scalar' ], out ['adder' ]
302
302
# 3D bias correction factors have seasonal/monthly correction in last axis
303
303
if len (scalar .shape ) == 3 and len (adder .shape ) == 3 :
@@ -402,8 +402,8 @@ def monthly_local_linear_bc(
402
402
out : np.ndarray
403
403
out = data * scalar + adder
404
404
"""
405
- time_index = pd . date_range ( ** date_range_kwargs )
406
- out = get_spatial_bc_factors (lat_lon , feature_name , bias_fp )
405
+ time_index = make_time_index_from_kws ( date_range_kwargs )
406
+ out = _get_spatial_bc_factors (lat_lon , feature_name , bias_fp )
407
407
scalar , adder = out ['scalar' ], out ['adder' ]
408
408
409
409
assert len (scalar .shape ) == 3 , 'Monthly bias correct needs 3D scalars'
@@ -471,6 +471,7 @@ def local_qdm_bc(
471
471
no_trend = False ,
472
472
delta_denom_min = None ,
473
473
delta_denom_zero = None ,
474
+ delta_range = None ,
474
475
out_range = None ,
475
476
max_workers = 1 ,
476
477
):
@@ -536,6 +537,11 @@ def local_qdm_bc(
536
537
division by a very small number making delta blow up and resulting
537
538
in very large output bias corrected values. See equation 4 of
538
539
Cannon et al., 2015 for the delta term.
540
+ delta_range : tuple | None
541
+ Option to set a (min, max) on the delta term in QDM. This can help
542
+ prevent QDM from making non-realistic increases/decreases in
543
+ otherwise physical values. See equation 4 of Cannon et al., 2015 for
544
+ the delta term.
539
545
out_range : None | tuple
540
546
Option to set floor/ceiling values on the output data.
541
547
max_workers: int | None
@@ -583,12 +589,15 @@ def local_qdm_bc(
583
589
584
590
"""
585
591
# Confirm that the given time matches the expected data size
586
- time_index = pd .date_range (** date_range_kwargs )
587
- assert (
588
- data .shape [2 ] == time_index .size
589
- ), 'Time should align with data 3rd dimension'
590
-
591
- params = get_spatial_bc_quantiles (
592
+ msg = f'data was expected to be a 3D array but got shape { data .shape } '
593
+ assert data .ndim == 3 , msg
594
+ time_index = make_time_index_from_kws (date_range_kwargs )
595
+ msg = (f'Time should align with data 3rd dimension but got data '
596
+ f'{ data .shape } and time_index length '
597
+ f'{ time_index .size } : { time_index } ' )
598
+ assert data .shape [- 1 ] == time_index .size , msg
599
+
600
+ params = _get_spatial_bc_quantiles (
592
601
lat_lon = lat_lon ,
593
602
base_dset = base_dset ,
594
603
feature_name = feature_name ,
@@ -635,6 +644,7 @@ def local_qdm_bc(
635
644
log_base = cfg ['log_base' ],
636
645
delta_denom_min = delta_denom_min ,
637
646
delta_denom_zero = delta_denom_zero ,
647
+ delta_range = delta_range ,
638
648
)
639
649
640
650
subset_idx = nearest_window_idx == window_idx
@@ -654,10 +664,17 @@ def local_qdm_bc(
654
664
output = np .maximum (output , np .min (out_range ))
655
665
output = np .minimum (output , np .max (out_range ))
656
666
667
+ if np .isnan (output ).any ():
668
+ msg = ('Presrat bias correction resulted in NaN values! If this is a '
669
+ 'relative QDM, you may try setting ``delta_denom_min`` or '
670
+ '``delta_denom_zero``' )
671
+ logger .error (msg )
672
+ raise RuntimeError (msg )
673
+
657
674
return output
658
675
659
676
660
- def get_spatial_bc_presrat (
677
+ def _get_spatial_bc_presrat (
661
678
lat_lon : np .array ,
662
679
base_dset : str ,
663
680
feature_name : str ,
@@ -766,7 +783,7 @@ def get_spatial_bc_presrat(
766
783
>>> lat_lon = np.array([
767
784
... [39.649033, -105.46875 ],
768
785
... [39.649033, -104.765625]])
769
- >>> params = get_spatial_bc_quantiles (
786
+ >>> params = _get_spatial_bc_quantiles (
770
787
... lat_lon, "ghi", "rsds", "./dist_params.hdf")
771
788
772
789
"""
@@ -788,12 +805,12 @@ def get_spatial_bc_presrat(
788
805
)
789
806
790
807
791
- def apply_presrat_bc (data , time_index , base_params , bias_params ,
792
- bias_fut_params , bias_tau_fut , k_factor ,
793
- time_window_center , dist = 'empirical' , sampling = 'invlog' ,
794
- log_base = 10 , relative = True , no_trend = False ,
795
- zero_rate_threshold = 1.157e-7 , out_range = None ,
796
- max_workers = 1 ):
808
+ def _apply_presrat_bc (data , time_index , base_params , bias_params ,
809
+ bias_fut_params , bias_tau_fut , k_factor ,
810
+ time_window_center , dist = 'empirical' , sampling = 'invlog' ,
811
+ log_base = 10 , relative = True , no_trend = False ,
812
+ delta_denom_min = None , delta_range = None , out_range = None ,
813
+ max_workers = 1 ):
797
814
"""Run PresRat to bias correct data from input parameters and not from bias
798
815
correction file on disk.
799
816
@@ -868,13 +885,18 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
868
885
:class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
869
886
assumes that params_mh is the data distribution representative for the
870
887
target data.
871
- zero_rate_threshold : float, default=1.157e-7
872
- Threshold value used to determine the zero rate in the observed
873
- historical dataset and the minimum value in the denominator in relative
874
- QDM. For instance, 0.01 means that anything less than that will be
875
- considered negligible, hence equal to zero. Dai 2006 defined this as
876
- 1mm/day. Pierce 2015 used 0.01mm/day. We recommend 0.01mm/day
877
- (1.157e-7 kg/m2/s).
888
+ delta_denom_min : float | None
889
+ Option to specify a minimum value for the denominator term in the
890
+ calculation of a relative delta value. This prevents division by a
891
+ very small number making delta blow up and resulting in very large
892
+ output bias corrected values. See equation 4 of Cannon et al., 2015
893
+ for the delta term. If this is not set, the ``zero_rate_threshold``
894
+ calculated as part of the presrat bias calculation will be used
895
+ delta_range : tuple | None
896
+ Option to set a (min, max) on the delta term in QDM. This can help
897
+ prevent QDM from making non-realistic increases/decreases in
898
+ otherwise physical values. See equation 4 of Cannon et al., 2015 for
899
+ the delta term.
878
900
out_range : None | tuple
879
901
Option to set floor/ceiling values on the output data.
880
902
max_workers : int | None
@@ -904,7 +926,8 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
904
926
relative = relative ,
905
927
sampling = sampling ,
906
928
log_base = log_base ,
907
- delta_denom_min = zero_rate_threshold ,
929
+ delta_denom_min = delta_denom_min ,
930
+ delta_range = delta_range ,
908
931
)
909
932
910
933
# input 3D shape (spatial, spatial, temporal)
@@ -928,6 +951,13 @@ def apply_presrat_bc(data, time_index, base_params, bias_params,
928
951
data_unbiased = np .maximum (data_unbiased , np .min (out_range ))
929
952
data_unbiased = np .minimum (data_unbiased , np .max (out_range ))
930
953
954
+ if np .isnan (data_unbiased ).any ():
955
+ msg = ('Presrat bias correction resulted in NaN values! If this is a '
956
+ 'relative QDM, you may try setting ``delta_denom_min`` or '
957
+ '``delta_denom_zero``' )
958
+ logger .error (msg )
959
+ raise RuntimeError (msg )
960
+
931
961
return data_unbiased
932
962
933
963
@@ -941,6 +971,9 @@ def local_presrat_bc(data: np.ndarray,
941
971
threshold = 0.1 ,
942
972
relative = True ,
943
973
no_trend = False ,
974
+ delta_denom_min = None ,
975
+ delta_range = None ,
976
+ k_range = None ,
944
977
out_range = None ,
945
978
max_workers = 1 ,
946
979
):
@@ -996,18 +1029,34 @@ def local_presrat_bc(data: np.ndarray,
996
1029
:class:`rex.utilities.bc_utils.QuantileDeltaMapping`. Note that this
997
1030
assumes that params_mh is the data distribution representative for the
998
1031
target data.
1032
+ delta_denom_min : float | None
1033
+ Option to specify a minimum value for the denominator term in the
1034
+ calculation of a relative delta value. This prevents division by a
1035
+ very small number making delta blow up and resulting in very large
1036
+ output bias corrected values. See equation 4 of Cannon et al., 2015
1037
+ for the delta term. If this is not set, the ``zero_rate_threshold``
1038
+ calculated as part of the presrat bias calculation will be used
1039
+ delta_range : tuple | None
1040
+ Option to set a (min, max) on the delta term in QDM. This can help
1041
+ prevent QDM from making non-realistic increases/decreases in
1042
+ otherwise physical values. See equation 4 of Cannon et al., 2015 for
1043
+ the delta term.
1044
+ k_range : tuple | None
1045
+ Option to set a (min, max) value for the k-factor multiplier
999
1046
out_range : None | tuple
1000
1047
Option to set floor/ceiling values on the output data.
1001
1048
max_workers : int | None
1002
1049
Max number of workers to use for QDM process pool
1003
1050
"""
1004
- time_index = pd .date_range (** date_range_kwargs )
1005
- assert data .ndim == 3 , 'data was expected to be a 3D array'
1006
- assert (
1007
- data .shape [- 1 ] == time_index .size
1008
- ), 'The last dimension of data should be time'
1009
-
1010
- params = get_spatial_bc_presrat (
1051
+ time_index = make_time_index_from_kws (date_range_kwargs )
1052
+ msg = f'data was expected to be a 3D array but got shape { data .shape } '
1053
+ assert data .ndim == 3 , msg
1054
+ msg = (f'Time should align with data 3rd dimension but got data '
1055
+ f'{ data .shape } and time_index length '
1056
+ f'{ time_index .size } : { time_index } ' )
1057
+ assert data .shape [- 1 ] == time_index .size , msg
1058
+
1059
+ params = _get_spatial_bc_presrat (
1011
1060
lat_lon , base_dset , feature_name , bias_fp , threshold
1012
1061
)
1013
1062
cfg = params ['cfg' ]
@@ -1022,21 +1071,30 @@ def local_presrat_bc(data: np.ndarray,
1022
1071
sampling = cfg ['sampling' ]
1023
1072
log_base = cfg ['log_base' ]
1024
1073
zero_rate_threshold = cfg ['zero_rate_threshold' ]
1074
+ delta_denom_min = delta_denom_min or zero_rate_threshold
1075
+
1076
+ if k_range is not None :
1077
+ k_factor = np .maximum (k_factor , np .min (k_range ))
1078
+ k_factor = np .minimum (k_factor , np .max (k_range ))
1079
+
1080
+ logger .debug (f'Presrat K Factor has shape { k_factor .shape } and ranges '
1081
+ f'from { k_factor .min ()} to { k_factor .max ()} ' )
1025
1082
1026
1083
if lr_padded_slice is not None :
1027
1084
spatial_slice = (lr_padded_slice [0 ], lr_padded_slice [1 ])
1028
1085
base_params = base_params [spatial_slice ]
1029
1086
bias_params = bias_params [spatial_slice ]
1030
1087
bias_fut_params = bias_fut_params [spatial_slice ]
1031
1088
1032
- data_unbiased = apply_presrat_bc (data , time_index , base_params ,
1033
- bias_params , bias_fut_params ,
1034
- bias_tau_fut , k_factor ,
1035
- time_window_center , dist = dist ,
1036
- sampling = sampling , log_base = log_base ,
1037
- relative = relative , no_trend = no_trend ,
1038
- zero_rate_threshold = zero_rate_threshold ,
1039
- out_range = out_range ,
1040
- max_workers = max_workers )
1089
+ data_unbiased = _apply_presrat_bc (data , time_index , base_params ,
1090
+ bias_params , bias_fut_params ,
1091
+ bias_tau_fut , k_factor ,
1092
+ time_window_center , dist = dist ,
1093
+ sampling = sampling , log_base = log_base ,
1094
+ relative = relative , no_trend = no_trend ,
1095
+ delta_denom_min = delta_denom_min ,
1096
+ delta_range = delta_range ,
1097
+ out_range = out_range ,
1098
+ max_workers = max_workers )
1041
1099
1042
1100
return data_unbiased
0 commit comments