Skip to content

Commit 7be530c

Browse files
authored
Merge pull request #49 from NREL/gb/merra_clouds
Gb/merra clouds
2 parents 6bb4ab8 + 187e900 commit 7be530c

File tree

5 files changed

+257
-42
lines changed

5 files changed

+257
-42
lines changed

nsrdb/config/nsrdb_vars.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ cloud_type,"'N/A': -15, 'Clear': 0, 'Probably Clear': 1, 'Fog': 2, 'Water': 3, '
1414
dew_point,Celsius,int16,10,-100,70,,,,,,derived,,,2000,500
1515
dhi,W/m2,uint16,1,0,800,,,,,,output,,,2000,500
1616
dni,W/m2,uint16,1,0,1350,,,,,,output,,,2000,500
17-
fill_flag,"0: no fill, 1: missing cloud type, 2: large timeseries missing cloud type, 3: missing cloud prop, 4: large timeseries missing cloud prop, 5: ghi exceeds clearsky, 6: neg or missing irrad, 7: mlclouds gap fill",uint8,1,0,100,,,,,,output,,,2000,1000
18-
cloud_fill_flag,"0: no fill, 1: missing cloud type, 2: large timeseries missing cloud type, 3: missing cloud prop, 4: large timeseries missing cloud prop, 5: ghi exceeds clearsky, 6: neg or missing irrad, 7: mlclouds gap fill",uint8,1,0,100,,,,,,output,,,2000,1000
17+
fill_flag,"0: no fill, 1: missing cloud type, 2: large timeseries missing cloud type, 3: missing cloud prop, 4: large timeseries missing cloud prop, 5: ghi exceeds clearsky, 6: neg or missing irrad, 7: mlclouds gap fill, 8: cloud data from merra",uint8,1,0,100,,,,,,output,,,2000,1000
18+
cloud_fill_flag,"0: no fill, 1: missing cloud type, 2: large timeseries missing cloud type, 3: missing cloud prop, 4: large timeseries missing cloud prop, 5: ghi exceeds clearsky, 6: neg or missing irrad, 7: mlclouds gap fill, 8: cloud data from merra",uint8,1,0,100,,,,,,output,,,2000,1000
1919
ghi,W/m2,uint16,1,0,1350,,,,,,output,,,2000,500
2020
ozone,atm-cm,uint16,1000,0.2,0.5,IDW2,FALSE,linear,tavg1_2d_slv_Nx,TO3,MERRA2,/projects/pxs/ancillary/merra,,2000,500
2121
relative_humidity,%,uint16,100,0,100,,,,,,derived,,,2000,500

nsrdb/data_model/base_handler.py

+20
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,26 @@ def scale_factor(self):
197197
"""
198198
return float(self.var_meta.loc[self.mask, 'scale_factor'].values[0])
199199

200+
@property
201+
def date(self):
202+
"""Get the date for this handler
203+
204+
Returns
205+
-------
206+
datetime.date
207+
"""
208+
return self._date
209+
210+
@property
211+
def doy(self):
212+
"""Get the day of year for this handler
213+
214+
Returns
215+
-------
216+
int
217+
"""
218+
return self.date.timetuple().tm_yday
219+
200220
@property
201221
def dtype(self):
202222
"""Get the data type attribute.

nsrdb/data_model/data_model.py

+149-5
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class variables in Ancillary() below.
3131
'wind_speed',
3232
'dew_point')
3333
"""
34+
import copy
3435
from concurrent.futures import as_completed
3536
import logging
3637
import numpy as np
@@ -267,6 +268,16 @@ def nsrdb_data_shape(self):
267268

268269
return self._nsrdb_data_shape
269270

271+
@property
272+
def var_meta(self):
273+
"""Get the nsrdb variables meta data table.
274+
275+
Returns
276+
-------
277+
pd.DataFrame
278+
"""
279+
return self._var_meta
280+
270281
@property
271282
def processed_data(self):
272283
"""Get the processed data dictionary.
@@ -609,6 +620,128 @@ def convert_units(var, data):
609620

610621
return data
611622

623+
@classmethod
624+
def check_merra_cloud_source(cls, var_list, cloud_vars, date, var_meta,
625+
factory_kwargs):
626+
"""Check if the cloud data source is a merra file and adjust variable
627+
lists and factory kwargs accordingly.
628+
629+
Parameters
630+
----------
631+
var_list : list
632+
List of variables being processed without the GOES cloud data
633+
handler
634+
cloud_vars : list
635+
List of cloud data variables from GOES being processed with the
636+
cloud data handler
637+
date : datetime.date
638+
Date of target processing
639+
var_meta : pd.DataFrame | None | str
640+
CSV file or dataframe containing meta data for all NSRDB variables.
641+
factory_kwargs : dict
642+
Optional namespace of kwargs to use to initialize variable data
643+
handlers from the data model's variable factory. Keyed by
644+
variable name. Values can be "source_dir", "handler", etc...
645+
source_dir for cloud variables can be a normal directory
646+
path or /directory/prefix*suffix where /directory/ can have
647+
more sub dirs
648+
649+
Returns
650+
-------
651+
var_list : list
652+
List of variables being processed without the GOES cloud data
653+
handler - cloud variables have been added to this list if merra is
654+
source
655+
cloud_vars : list
656+
List of variables being processed with the GOES cloud data handler.
657+
This is empty if the data source is merra.
658+
factory_kwargs : dict
659+
Optional namespace of kwargs to initialize variable data. If cloud
660+
variables are being sourced from merra, appropriate kwargs are
661+
added to this dict.
662+
"""
663+
merra_c_vars = ('cld_opd_dcomp', 'cld_reff_dcomp', 'cloud_type',
664+
'cld_press_acha')
665+
666+
if any(cv in cloud_vars for cv in merra_c_vars):
667+
var_kwargs = factory_kwargs.get('cloud_type', {})
668+
handler = VarFactory.get_base_handler('cloud_type',
669+
var_meta=var_meta,
670+
date=date, **var_kwargs)
671+
is_merra, new_kwargs = cls.is_merra_cloud(handler)
672+
673+
if is_merra:
674+
for var in merra_c_vars:
675+
factory_kwargs[var].update(copy.deepcopy(new_kwargs))
676+
677+
factory_kwargs['cld_opd_dcomp']['merra_name'] = 'TAUTOT'
678+
factory_kwargs['cld_opd_dcomp']['spatial_interp'] = 'IDW2'
679+
factory_kwargs['cld_opd_dcomp']['temporal_interp'] = 'linear'
680+
681+
keep_cloud_vars = [v for v in cloud_vars if v in merra_c_vars]
682+
var_list += keep_cloud_vars
683+
cloud_vars = []
684+
685+
logger.info('Updated factory kwargs for cloud data from '
686+
'MERRA: {}'.format(factory_kwargs))
687+
688+
return var_list, cloud_vars, factory_kwargs
689+
690+
@staticmethod
691+
def is_merra_cloud(handler):
692+
"""Check to see if cloud variables have merra2 source files for the
693+
current day
694+
695+
Parameters
696+
----------
697+
handler : AncillaryVarHandler
698+
Base data model variable handler
699+
700+
Returns
701+
-------
702+
check : bool
703+
True if the source is merra, False if not
704+
out : dict
705+
New factory kwargs for the variable if source is merra
706+
"""
707+
708+
pattern = handler.pattern
709+
if pattern is None:
710+
return False, {}
711+
712+
if '{doy}' in pattern:
713+
pattern = pattern.format(doy=handler.doy)
714+
715+
source_dir = os.path.dirname(pattern)
716+
if not os.path.exists(source_dir):
717+
return False, {}
718+
719+
fns = os.listdir(source_dir)
720+
if not fns:
721+
return False, {}
722+
723+
fns = [fn for fn in fns if fn.lower().startswith('merra')]
724+
725+
if len(fns) != 1:
726+
return False, {}
727+
728+
fn = fns[0]
729+
kwargs = {'handler': 'MerraVar',
730+
'pattern': os.path.join(source_dir, fn),
731+
'merra_dset': 'tavg1_2d_rad_Nx_clouds',
732+
'data_source': 'MERRA2',
733+
'elevation_correct': False,
734+
'spatial_interp': 'NN',
735+
'temporal_interp': 'nearest',
736+
'source_directory': source_dir}
737+
738+
if handler.name == 'cld_opd_dcomp':
739+
kwargs['merra_name'] = 'TAUTOT'
740+
kwargs['spatial_interp'] = 'IDW2'
741+
kwargs['temporal_interp'] = 'linear'
742+
743+
return True, kwargs
744+
612745
def is_cloud_var(self, var):
613746
"""Determine whether or not the variable is a cloud variable from the
614747
CLAVR-x / GOES data
@@ -1218,9 +1351,6 @@ def _process_multiple(cls, var_list, date, nsrdb_grid,
12181351
var_meta=var_meta, factory_kwargs=factory_kwargs,
12191352
scale=scale, max_workers=max_workers)
12201353

1221-
# run pre-flight checks
1222-
data_model.run_pre_flight(var_list)
1223-
12241354
# default multiple compute
12251355
if var_list is None:
12261356
var_list = cls.ALL_VARS
@@ -1243,6 +1373,17 @@ def _process_multiple(cls, var_list, date, nsrdb_grid,
12431373
derived_vars = [v for v in var_list if data_model.is_derived_var(v)]
12441374
var_list = [v for v in var_list if v not in derived_vars]
12451375

1376+
temp = cls.check_merra_cloud_source(var_list, cloud_vars, date,
1377+
var_meta, factory_kwargs)
1378+
var_list, cloud_vars, factory_kwargs = temp
1379+
factory_kwargs = {} if factory_kwargs is None else factory_kwargs
1380+
data_model._factory_kwargs = factory_kwargs
1381+
1382+
# run pre-flight checks
1383+
data_model.run_pre_flight(var_list)
1384+
data_model.run_pre_flight(cloud_vars)
1385+
data_model.run_pre_flight(derived_vars)
1386+
12461387
logger.info('First processing data for variable list: {}'
12471388
.format(var_list))
12481389
logger.info('Then processing cloud data for variable list: {}'
@@ -1424,8 +1565,11 @@ def dump(self, var, fpath_out, data, purge=False, mode='w'):
14241565
meta_gids = self.nsrdb_grid[['gid']]
14251566
fout.meta = meta_gids
14261567

1427-
var_obj = VarFactory.get_base_handler(
1428-
var, var_meta=self._var_meta, date=self.date)
1568+
var_kwargs = self._factory_kwargs.get(var, {})
1569+
var_obj = VarFactory.get_base_handler(var,
1570+
var_meta=self._var_meta,
1571+
date=self.date,
1572+
**var_kwargs)
14291573
attrs = var_obj.attrs
14301574

14311575
fout._add_dset(dset_name=var, data=data,

0 commit comments

Comments
 (0)