Skip to content

Commit 8b8f859

Browse files
authored
Merge pull request #77 from NREL/bnb/dev
Bnb/dev
2 parents 76a6d86 + e815c42 commit 8b8f859

16 files changed

+493
-100
lines changed

README.rst

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ The NSRDB `Data Model
4949
aggregation framework that sources, processes, and prepares data for input to
5050
All-Sky.
5151

52+
The MLClouds Model
53+
==================
54+
The `MLClouds Model <https://github.com/NREL/mlclouds.git>`_ is used to predict
55+
missing cloud properties (a.k.a. Gap Fill). The NSRDB interface with MLClouds
56+
can be found `here <https://github.com/NREL/nsrdb/tree/master/nsrdb/gap_fill>`_.
57+
5258
Installation
5359
============
5460

nsrdb/cli.py

+61-22
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ def main(ctx, config, verbose):
125125
126126
To do a standard CONUS / Full Disc run use the following commands::
127127
128-
$ config='{"year": <year>, "out_dir": <out_dir>}'
129-
$ python -m nsrdb.cli create-configs -c config
128+
$ CONFIG='{"year": <year>, "out_dir": <out_dir>}'
129+
130+
$ python -m nsrdb.cli create-configs -c ${CONFIG}
131+
130132
$ cd <out_dir>
133+
131134
$ bash run.sh (run this until all main steps are complete)
135+
132136
$ cd post_proc
137+
133138
$ bash run.sh (run this until all post-proc steps are complete)
134139
135140
See the help pages of the module CLIs for more details on the config files
@@ -248,16 +253,17 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
248253
@click.option(
249254
'--run_type',
250255
'-r',
251-
default='full',
256+
default='surfrad',
252257
type=str,
253-
help="""Run type to create configs for. Can be "full" (generates all config
254-
and pipline files for the given year, including all domain main runs,
255-
blending, aggregation, and collection), or "main" (for standard run without
256-
post-processing, with data-model, ml-cloud-fill, all-sky, and
257-
collect-data-model), "aggregate" (for aggregating post-2018 data to
258-
pre-2018 resolution), "blend" (for blending east and west domains into a
259-
single domain), or "post" (for all blending / aggregation / collection for
260-
a given year)""",
258+
help="""Run type to create configs for. Can be "surfrad" (just writes a
259+
single template config with any provided kwargs replaced, with a surfrad
260+
meta file), "full" (generates all config and pipline files for the given
261+
year, including all domain main runs, blending, aggregation, and
262+
collection), or "main" (for standard run without post-processing, with
263+
data-model, ml-cloud-fill, all-sky, and collect-data-model), "aggregate"
264+
(for aggregating post-2018 data to pre-2018 resolution), "blend" (for
265+
blending east and west domains into a single domain), or "post" (for all
266+
blending / aggregation / collection for a given year)""",
261267
)
262268
@click.option(
263269
'--all_domains',
@@ -277,27 +283,30 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
277283
)
278284
@click.pass_context
279285
def create_configs(
280-
ctx, config, run_type='full', all_domains=False, collect=False
286+
ctx, config, run_type='surfrad', all_domains=False, collect=False
281287
):
282288
"""Create config files for standard NSRDB runs using config templates.
283289
284-
Examples
285-
--------
286-
$ python -m nsrdb.cli create-configs -c '{"year": 2020, "out_dir": "./"}'
290+
To generate all full_disc / conus run directories for east /
291+
west regions, each with main routine config files contained run the
292+
following::
293+
294+
$ CONFIG='{"year": 2020, "out_dir": "./"}'
287295
288-
The above will generate all full_disc / conus run directories for east /
289-
west regions, each with main routine config files contained. Additionally,
290-
conus / full_disc blend configs, aggregation config, collection config, and
291-
a post processing pipeline config with all these steps will be written to a
292-
"post_proc" directory so that post-processing can be run simply with::
296+
$ python -m nsrdb.cli create-configs --run_type full -c ${CONFIG}
297+
298+
Additionally, conus / full_disc blend configs, aggregation config,
299+
collection config, and a post processing pipeline config with all these
300+
steps will be written to a "post_proc" directory so that post-processing
301+
can be run simply with::
293302
294303
$ python -m nsrdb.cli pipeline -c config_pipeline_post.json
295304
"""
296305

297306
ctx.ensure_object(dict)
298307
func_name = f'collect_{run_type}' if collect else run_type
299308
func_name = 'main_all' if run_type == 'main' and all_domains else func_name
300-
valid_types = ['full', 'main', 'aggregate', 'blend', 'post']
309+
valid_types = ['full', 'main', 'aggregate', 'blend', 'post', 'surfrad']
301310
msg = (
302311
f'Received unknown "run_type" {run_type}. Accepted values are '
303312
f'{valid_types}'
@@ -436,6 +445,7 @@ def ml_cloud_fill(ctx, config, verbose=False, pipeline_step=None):
436445
"col_chunk": 10000,
437446
"fill_all": false,
438447
"max_workers": 4
448+
"model_path": ...
439449
}
440450
}
441451
@@ -659,7 +669,7 @@ def collect_data_model(ctx, config, verbose=False, pipeline_step=None):
659669
'-c',
660670
type=CONFIG_TYPE,
661671
required=True,
662-
help='Path to config file or dict with kwargs for NSRDB.all_sky()',
672+
help='Path to config file or dict with kwargs for NSRDB.collect_final()',
663673
)
664674
@click.option(
665675
'-v',
@@ -692,6 +702,34 @@ def collect_final(ctx, config, verbose=False, pipeline_step=None):
692702
)
693703

694704

705+
@main.command()
706+
@click.option(
707+
'--config',
708+
'-c',
709+
type=CONFIG_TYPE,
710+
required=True,
711+
help='Path to config file or dict with kwargs for NSRDB.collect_daily()',
712+
)
713+
@click.option(
714+
'-v',
715+
'--verbose',
716+
is_flag=True,
717+
help='Flag to turn on debug logging. Default is False.',
718+
)
719+
@click.pass_context
720+
def collect_daily(ctx, config, verbose=False, pipeline_step=None):
721+
"""Collect daily files into a final file."""
722+
723+
BaseCLI.kickoff_single(
724+
ctx=ctx,
725+
module_name=ModuleName.COLLECT_DAILY,
726+
func=Collector.collect_daily,
727+
config=config,
728+
verbose=verbose,
729+
pipeline_step=pipeline_step,
730+
)
731+
732+
695733
@main.command()
696734
@click.option(
697735
'--config',
@@ -1045,6 +1083,7 @@ def batch(
10451083
Pipeline.COMMANDS[ModuleName.AGGREGATE] = aggregate
10461084
Pipeline.COMMANDS[ModuleName.COLLECT_DATA_MODEL] = collect_data_model
10471085
Pipeline.COMMANDS[ModuleName.COLLECT_FINAL] = collect_final
1086+
Pipeline.COMMANDS[ModuleName.COLLECT_DAILY] = collect_daily
10481087
Pipeline.COMMANDS[ModuleName.TMY] = tmy
10491088
Pipeline.COMMANDS[ModuleName.COLLECT_BLEND] = collect_blend
10501089
Pipeline.COMMANDS[ModuleName.COLLECT_AGGREGATE] = collect_aggregate

nsrdb/config/create_configs.py

+48-38
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@
3434
'meta_dir': DEFAULT_META_DIR,
3535
}
3636

37-
MAIN_KWARGS = {
38-
**BASE_KWARGS,
39-
'extent': 'full',
40-
'satellite': 'east',
37+
MAIN_KWARGS = {**BASE_KWARGS, 'extent': 'full', 'satellite': 'east'}
38+
39+
SURFRAD_KWARGS = {
40+
**MAIN_KWARGS,
41+
'freq': '15min',
42+
'spatial': '4km',
4143
}
4244

4345
BLEND_KWARGS = {
@@ -47,10 +49,7 @@
4749
'main_dir': '../',
4850
}
4951

50-
COLLECT_BLEND_KWARGS = {
51-
**BASE_KWARGS,
52-
'extent': 'full',
53-
}
52+
COLLECT_BLEND_KWARGS = {**BASE_KWARGS, 'extent': 'full'}
5453

5554
AGG_KWARGS = {
5655
**BASE_KWARGS,
@@ -62,7 +61,7 @@
6261
'conus_freq': '5min',
6362
'final_freq': '30min',
6463
'n_chunks': 32,
65-
'source_priority': ['conus', 'full_disk'],
64+
'source_priority': ['conus', 'full_disc'],
6665
}
6766

6867
COLLECT_AGG_KWARGS = {
@@ -102,14 +101,16 @@ class CreateConfigs:
102101
standard CONUS / Full Disc runs."""
103102

104103
MAIN_RUN_NAME = '{basename}_{satellite}_{extent}_{year}_{spatial}_{freq}'
104+
SURFRAD_RUN_NAME = '{basename}_{year}_surfrad'
105105
BLEND_RUN_NAME = '{basename}_{extent}_{year}_blend'
106106
AGG_RUN_NAME = '{basename}_{year}_aggregate'
107107
COLLECT_AGG_RUN_NAME = '{basename}_{year}_collect_aggregate'
108108
COLLECT_BLEND_RUN_NAME = '{basename}_{extent}_{year}_collect_blend'
109109

110110
@classmethod
111-
def _init_kwargs(cls, kwargs, default_kwargs):
111+
def init_kwargs(cls, kwargs=None, default_kwargs=None):
112112
"""Initialize config with default kwargs."""
113+
default_kwargs = default_kwargs or {}
113114
msg = f'kwargs must have a "year" key. Received {kwargs}.'
114115
assert 'year' in kwargs, msg
115116
config = copy.deepcopy(default_kwargs)
@@ -211,6 +212,7 @@ def _get_run_name(cls, config, run_type='main'):
211212
{k: v for k, v in BASE_KWARGS.items() if k not in config}
212213
)
213214
pattern_dict = {
215+
'surfrad': cls.SURFRAD_RUN_NAME,
214216
'main': cls.MAIN_RUN_NAME,
215217
'blend': cls.BLEND_RUN_NAME,
216218
'aggregate': cls.AGG_RUN_NAME,
@@ -227,7 +229,7 @@ def _get_run_name(cls, config, run_type='main'):
227229
return pattern.format(**run_config)
228230

229231
@classmethod
230-
def _update_run_templates(cls, config):
232+
def _update_run_templates(cls, config, run_type='main'):
231233
"""Replace format keys and dictionary keys in config templates with
232234
user input values."""
233235

@@ -236,6 +238,17 @@ def _update_run_templates(cls, config):
236238
f'{pprint.pformat(config, indent=2)}'
237239
)
238240

241+
config['doy_range'] = config.get(
242+
'doy_range',
243+
([1, 367] if calendar.isleap(config['year']) else [1, 366]),
244+
)
245+
config['start_doy'], config['end_doy'] = (
246+
config['doy_range'][0],
247+
config['doy_range'][1],
248+
)
249+
config['run_name'] = cls._get_run_name(config, run_type=run_type)
250+
config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])
251+
239252
template = (
240253
PRE2018_CONFIG_TEMPLATE
241254
if int(config['year']) < 2018
@@ -263,6 +276,22 @@ def _update_run_templates(cls, config):
263276
config_dict, cls._get_config_file(config, 'pipeline')
264277
)
265278

279+
run_file = os.path.join(config['out_dir'], 'run.sh')
280+
with open(run_file, 'w') as f:
281+
f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')
282+
283+
logger.info(f'Saved run script: {run_file}.')
284+
285+
@classmethod
286+
def surfrad(cls, kwargs):
287+
"""Get basic config template specified parameters replaced."""
288+
config = cls.init_kwargs(kwargs, SURFRAD_KWARGS)
289+
config['extent_tag'] = EXTENT_MAP['extent_tag'][config['extent']]
290+
config['meta_file'] = os.path.join(
291+
config['meta_dir'], 'surfrad_meta.csv'
292+
)
293+
cls._update_run_templates(config, run_type='surfrad')
294+
266295
@classmethod
267296
def main(cls, kwargs):
268297
"""Modify config files with specified parameters
@@ -273,7 +302,7 @@ def main(cls, kwargs):
273302
Dictionary of parameters including year, basename, satellite,
274303
extent, freq, spatial, meta_file, doy_range
275304
"""
276-
config = cls._init_kwargs(kwargs, MAIN_KWARGS)
305+
config = cls.init_kwargs(kwargs, MAIN_KWARGS)
277306
msg = (
278307
'"extent" key not provided. Provide "extent" so correct input '
279308
'data can be selected'
@@ -284,27 +313,8 @@ def main(cls, kwargs):
284313
config['meta_file'] = cls._get_meta(config)
285314
config['spatial'], config['freq'] = cls._get_res(config)
286315

287-
config['doy_range'] = config.get(
288-
'doy_range',
289-
([1, 367] if calendar.isleap(config['year']) else [1, 366]),
290-
)
291-
292-
config['start_doy'], config['end_doy'] = (
293-
config['doy_range'][0],
294-
config['doy_range'][1],
295-
)
296-
297-
config['run_name'] = cls._get_run_name(config)
298-
config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])
299-
300316
cls._update_run_templates(config)
301317

302-
run_file = os.path.join(config['out_dir'], 'run.sh')
303-
with open(run_file, 'w') as f:
304-
f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')
305-
306-
logger.info(f'Saved run script: {run_file}.')
307-
308318
@classmethod
309319
def main_all(cls, kwargs):
310320
"""Modify config files for all domains with specified parameters.
@@ -463,7 +473,7 @@ def _get_agg_entry(cls, config, extent):
463473

464474
@classmethod
465475
def _aggregate(cls, kwargs):
466-
"""Get config for conus and full disk high-resolution to low-resolution
476+
"""Get config for conus and full disc high-resolution to low-resolution
467477
aggregation. This is then used as the input to `nsrdb.cli.aggregate`
468478
469479
Parameters
@@ -472,14 +482,14 @@ def _aggregate(cls, kwargs):
472482
Dictionary with keys specifying the case for which to aggregate
473483
files
474484
"""
475-
config = cls._init_kwargs(kwargs, AGG_KWARGS)
485+
config = cls.init_kwargs(kwargs, AGG_KWARGS)
476486

477487
if config['year'] == 2018:
478488
data = NSRDB_2018
479489

480490
else:
481491
data = {
482-
'full_disk': cls._get_agg_entry(config, extent='full'),
492+
'full_disc': cls._get_agg_entry(config, extent='full'),
483493
'conus': cls._get_agg_entry(config, extent='conus'),
484494
'final': cls._get_agg_entry(config, extent='final'),
485495
}
@@ -490,7 +500,7 @@ def _aggregate(cls, kwargs):
490500

491501
@classmethod
492502
def aggregate(cls, kwargs):
493-
"""Get config for conus and full disk high-resolution to low-resolution
503+
"""Get config for conus and full disc high-resolution to low-resolution
494504
aggregation. This is then used as the input to `nsrdb.cli.aggregate`
495505
496506
Parameters
@@ -521,7 +531,7 @@ def _blend(cls, kwargs):
521531
Dictionary with keys specifying the case for which to blend data
522532
files
523533
"""
524-
config = cls._init_kwargs(kwargs, BLEND_KWARGS)
534+
config = cls.init_kwargs(kwargs, BLEND_KWARGS)
525535
config['map_col'] = EXTENT_MAP['map_col'][config['extent']]
526536
config['lon_seam'] = EXTENT_MAP['lon_seam'][config['extent']]
527537
config['meta_file'] = cls._get_meta(config, run_type='blend')
@@ -599,7 +609,7 @@ def _collect_blend(cls, kwargs):
599609
Dictionary with keys specifying the case for blend collection
600610
"""
601611

602-
config = cls._init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
612+
config = cls.init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
603613
config['meta_final'] = cls._get_meta(config, run_type='collect-blend')
604614
config['collect_dir'] = cls._get_run_name(config, run_type='blend')
605615
config['collect_tag'] = config['collect_dir'].replace('_blend', '')
@@ -650,7 +660,7 @@ def _collect_aggregate(cls, kwargs):
650660
kwargs : dict
651661
Dictionary with keys specifying the case for aggregation collection
652662
"""
653-
config = cls._init_kwargs(kwargs, COLLECT_AGG_KWARGS)
663+
config = cls.init_kwargs(kwargs, COLLECT_AGG_KWARGS)
654664

655665
config['meta_final'] = cls._get_meta(
656666
config, run_type='collect-aggregate'

nsrdb/config/templates/config_nsrdb_post2017.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"max_workers": 10,
66
"n_chunks": 1,
77
"memory": 178,
8-
"n_writes": 50,
8+
"n_writes": 5,
99
"walltime": 48
1010
},
1111
"daily-all-sky": {},
@@ -80,4 +80,4 @@
8080
"fill_all": false,
8181
"max_workers": 4
8282
}
83-
}
83+
}

0 commit comments

Comments
 (0)