@@ -31,6 +31,7 @@ class variables in Ancillary() below.
31
31
'wind_speed',
32
32
'dew_point')
33
33
"""
34
+ import copy
34
35
from concurrent .futures import as_completed
35
36
import logging
36
37
import numpy as np
@@ -267,6 +268,16 @@ def nsrdb_data_shape(self):
267
268
268
269
return self ._nsrdb_data_shape
269
270
271
+ @property
272
+ def var_meta (self ):
273
+ """Get the nsrdb variables meta data table.
274
+
275
+ Returns
276
+ -------
277
+ pd.DataFrame
278
+ """
279
+ return self ._var_meta
280
+
270
281
@property
271
282
def processed_data (self ):
272
283
"""Get the processed data dictionary.
@@ -609,6 +620,128 @@ def convert_units(var, data):
609
620
610
621
return data
611
622
623
+ @classmethod
624
+ def check_merra_cloud_source (cls , var_list , cloud_vars , date , var_meta ,
625
+ factory_kwargs ):
626
+ """Check if the cloud data source is a merra file and adjust variable
627
+ lists and factory kwargs accordingly.
628
+
629
+ Parameters
630
+ ----------
631
+ var_list : list
632
+ List of variables being processed without the GOES cloud data
633
+ handler
634
+ cloud_vars : list
635
+ List of cloud data variables from GOES being processed with the
636
+ cloud data handler
637
+ date : datetime.date
638
+ Date of target processing
639
+ var_meta : pd.DataFrame | None | str
640
+ CSV file or dataframe containing meta data for all NSRDB variables.
641
+ factory_kwargs : dict
642
+ Optional namespace of kwargs to use to initialize variable data
643
+ handlers from the data model's variable factory. Keyed by
644
+ variable name. Values can be "source_dir", "handler", etc...
645
+ source_dir for cloud variables can be a normal directory
646
+ path or /directory/prefix*suffix where /directory/ can have
647
+ more sub dirs
648
+
649
+ Returns
650
+ -------
651
+ var_list : list
652
+ List of variables being processed without the GOES cloud data
653
+ handler - cloud variables have been added to this list if merra is
654
+ source
655
+ cloud_vars : list
656
+ List of variables being processed with the GOES cloud data handler.
657
+ This is empty if the data source is merra.
658
+ factory_kwargs : dict
659
+ Optional namespace of kwargs to initialize variable data. If cloud
660
+ variables are being sourced from merra, appropriate kwargs are
661
+ added to this dict.
662
+ """
663
+ merra_c_vars = ('cld_opd_dcomp' , 'cld_reff_dcomp' , 'cloud_type' ,
664
+ 'cld_press_acha' )
665
+
666
+ if any (cv in cloud_vars for cv in merra_c_vars ):
667
+ var_kwargs = factory_kwargs .get ('cloud_type' , {})
668
+ handler = VarFactory .get_base_handler ('cloud_type' ,
669
+ var_meta = var_meta ,
670
+ date = date , ** var_kwargs )
671
+ is_merra , new_kwargs = cls .is_merra_cloud (handler )
672
+
673
+ if is_merra :
674
+ for var in merra_c_vars :
675
+ factory_kwargs [var ].update (copy .deepcopy (new_kwargs ))
676
+
677
+ factory_kwargs ['cld_opd_dcomp' ]['merra_name' ] = 'TAUTOT'
678
+ factory_kwargs ['cld_opd_dcomp' ]['spatial_interp' ] = 'IDW2'
679
+ factory_kwargs ['cld_opd_dcomp' ]['temporal_interp' ] = 'linear'
680
+
681
+ keep_cloud_vars = [v for v in cloud_vars if v in merra_c_vars ]
682
+ var_list += keep_cloud_vars
683
+ cloud_vars = []
684
+
685
+ logger .info ('Updated factory kwargs for cloud data from '
686
+ 'MERRA: {}' .format (factory_kwargs ))
687
+
688
+ return var_list , cloud_vars , factory_kwargs
689
+
690
+ @staticmethod
691
+ def is_merra_cloud (handler ):
692
+ """Check to see if cloud variables have merra2 source files for the
693
+ current day
694
+
695
+ Parameters
696
+ ----------
697
+ handler : AncillaryVarHandler
698
+ Base data model variable handler
699
+
700
+ Returns
701
+ -------
702
+ check : bool
703
+ True if the source is merra, False if not
704
+ out : dict
705
+ New factory kwargs for the variable if source is merra
706
+ """
707
+
708
+ pattern = handler .pattern
709
+ if pattern is None :
710
+ return False , {}
711
+
712
+ if '{doy}' in pattern :
713
+ pattern = pattern .format (doy = handler .doy )
714
+
715
+ source_dir = os .path .dirname (pattern )
716
+ if not os .path .exists (source_dir ):
717
+ return False , {}
718
+
719
+ fns = os .listdir (source_dir )
720
+ if not fns :
721
+ return False , {}
722
+
723
+ fns = [fn for fn in fns if fn .lower ().startswith ('merra' )]
724
+
725
+ if len (fns ) != 1 :
726
+ return False , {}
727
+
728
+ fn = fns [0 ]
729
+ kwargs = {'handler' : 'MerraVar' ,
730
+ 'pattern' : os .path .join (source_dir , fn ),
731
+ 'merra_dset' : 'tavg1_2d_rad_Nx_clouds' ,
732
+ 'data_source' : 'MERRA2' ,
733
+ 'elevation_correct' : False ,
734
+ 'spatial_interp' : 'NN' ,
735
+ 'temporal_interp' : 'nearest' ,
736
+ 'source_directory' : source_dir }
737
+
738
+ if handler .name == 'cld_opd_dcomp' :
739
+ kwargs ['merra_name' ] = 'TAUTOT'
740
+ kwargs ['spatial_interp' ] = 'IDW2'
741
+ kwargs ['temporal_interp' ] = 'linear'
742
+
743
+ return True , kwargs
744
+
612
745
def is_cloud_var (self , var ):
613
746
"""Determine whether or not the variable is a cloud variable from the
614
747
CLAVR-x / GOES data
@@ -1218,9 +1351,6 @@ def _process_multiple(cls, var_list, date, nsrdb_grid,
1218
1351
var_meta = var_meta , factory_kwargs = factory_kwargs ,
1219
1352
scale = scale , max_workers = max_workers )
1220
1353
1221
- # run pre-flight checks
1222
- data_model .run_pre_flight (var_list )
1223
-
1224
1354
# default multiple compute
1225
1355
if var_list is None :
1226
1356
var_list = cls .ALL_VARS
@@ -1243,6 +1373,17 @@ def _process_multiple(cls, var_list, date, nsrdb_grid,
1243
1373
derived_vars = [v for v in var_list if data_model .is_derived_var (v )]
1244
1374
var_list = [v for v in var_list if v not in derived_vars ]
1245
1375
1376
+ temp = cls .check_merra_cloud_source (var_list , cloud_vars , date ,
1377
+ var_meta , factory_kwargs )
1378
+ var_list , cloud_vars , factory_kwargs = temp
1379
+ factory_kwargs = {} if factory_kwargs is None else factory_kwargs
1380
+ data_model ._factory_kwargs = factory_kwargs
1381
+
1382
+ # run pre-flight checks
1383
+ data_model .run_pre_flight (var_list )
1384
+ data_model .run_pre_flight (cloud_vars )
1385
+ data_model .run_pre_flight (derived_vars )
1386
+
1246
1387
logger .info ('First processing data for variable list: {}'
1247
1388
.format (var_list ))
1248
1389
logger .info ('Then processing cloud data for variable list: {}'
@@ -1424,8 +1565,11 @@ def dump(self, var, fpath_out, data, purge=False, mode='w'):
1424
1565
meta_gids = self .nsrdb_grid [['gid' ]]
1425
1566
fout .meta = meta_gids
1426
1567
1427
- var_obj = VarFactory .get_base_handler (
1428
- var , var_meta = self ._var_meta , date = self .date )
1568
+ var_kwargs = self ._factory_kwargs .get (var , {})
1569
+ var_obj = VarFactory .get_base_handler (var ,
1570
+ var_meta = self ._var_meta ,
1571
+ date = self .date ,
1572
+ ** var_kwargs )
1429
1573
attrs = var_obj .attrs
1430
1574
1431
1575
fout ._add_dset (dset_name = var , data = data ,
0 commit comments