diff --git a/configs/anvil/config.20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil b/configs/anvil/config.20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil index 63dd5f336..e9d798864 100644 --- a/configs/anvil/config.20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil +++ b/configs/anvil/config.20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil @@ -23,7 +23,7 @@ mpasMeshName = oEC60to30v3 # Directory for mapping files (if they have been generated already). If mapping # files needed by the analysis are not found here, they will be generated and # placed in the output mappingSubdirectory -mappingDirectory = /lcrc/group/acme/mapping +mappingDirectory = /lcrc/group/acme/mpas_analysis/mapping [output] ## options related to writing out plots, intermediate cached data sets, logs, diff --git a/docs/api.rst b/docs/api.rst index 0c88c9884..be61769f3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -87,6 +87,13 @@ Shared modules Reading MPAS Datasets --------------------- +.. currentmodule:: mpas_analysis.shared.io + +.. autosummary:: + :toctree: generated/ + + open_mpas_dataset + .. currentmodule:: mpas_analysis.shared.mpas_xarray .. autosummary:: diff --git a/mpas_analysis/ocean/index_nino34.py b/mpas_analysis/ocean/index_nino34.py index c34cf55f0..41c004909 100644 --- a/mpas_analysis/ocean/index_nino34.py +++ b/mpas_analysis/ocean/index_nino34.py @@ -4,17 +4,17 @@ import pandas as pd import numpy as np from scipy import signal, stats -import os import matplotlib.pyplot as plt from ..shared.climatology import climatology from ..shared.constants import constants from ..shared.io.utility import build_config_full_path -from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset from ..shared.timekeeping.utility import get_simulation_start_time, \ datetime_to_days, string_to_days_since_date + +from ..shared.io import open_mpas_dataset + from ..shared.plot.plotting import plot_xtick_format, plot_size_y_axis from ..shared import AnalysisTask @@ -26,12 +26,18 @@ class IndexNino34(AnalysisTask): # {{{ A task for computing and plotting time series and spectra of the El Nino 3.4 climate index + Attributes + ---------- + + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Luke Van Roekel, Xylar Asay-Davis ''' - def __init__(self, config): # {{{ + def __init__(self, config, mpasTimeSeriesTask): # {{{ ''' Construct the analysis task. @@ -40,6 +46,9 @@ def __init__(self, config): # {{{ config : instance of MpasAnalysisConfigParser Contains configuration options + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis @@ -52,6 +61,10 @@ def __init__(self, config): # {{{ componentName='ocean', tags=['index', 'nino']) + self.mpasTimeSeriesTask = mpasTimeSeriesTask + + self.run_after(mpasTimeSeriesTask) + # }}} def setup_and_check(self): # {{{ @@ -70,19 +83,14 @@ def setup_and_check(self): # {{{ # self.calendar super(IndexNino34, self).setup_and_check() - # get a list of timeSeriesStats output files from the streams file, - # reading only those that are between the start and end dates - streamName = 'timeSeriesStatsMonthlyOutput' - self.startDate = self.config.get('index', 'startDate') - self.endDate = self.config.get('index', 'endDate') - self.inputFiles = self.historyStreams.readpath( - streamName, startDate=self.startDate, endDate=self.endDate, - calendar=self.calendar) + self.startDate = self.config.get('timeSeries', 'startDate') + self.endDate = self.config.get('timeSeries', 'endDate') + + self.variableList = \ + ['timeMonthly_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + self.mpasTimeSeriesTask.add_variables(variableList=self.variableList) - if len(self.inputFiles) == 0: - raise IOError('No files were found in stream {} between {} and ' - '{}.'.format(streamName, self.startDate, - self.endDate)) + self.inputFile = self.mpasTimeSeriesTask.outputFile mainRunName = self.config.get('runs', 'mainRunName') @@ -110,7 +118,6 @@ def run_task(self): # {{{ self.logger.info(' Load SST data...') fieldName = 'nino' - simulationStartTime = get_simulation_start_time(self.runStreams) config = self.config calendar = self.calendar @@ -130,10 +137,6 @@ def run_task(self): # {{{ obsTitle = 'ERS SSTv4' refDate = '1800-01-01' - self.logger.info('\n Reading files:\n' - ' {} through\n {}'.format( - os.path.basename(self.inputFiles[0]), - os.path.basename(self.inputFiles[-1]))) mainRunName = config.get('runs', 'mainRunName') # regionIndex should correspond to NINO34 in surface weighted Average @@ -141,18 +144,11 @@ def run_task(self): # {{{ regionIndex = config.getint('indexNino34', 'regionIndicesToPlot') # Load data: - varName = \ - 'timeMonthly_avg_avgValueWithinOceanRegion_avgSurfaceTemperature' - varList = [varName] - ds = open_multifile_dataset(fileNames=self.inputFiles, - calendar=calendar, - config=config, - simulationStartTime=simulationStartTime, - timeVariableName=['xtime_startMonthly', - 'xtime_endMonthly'], - variableList=varList, - startDate=self.startDate, - endDate=self.endDate) + ds = open_mpas_dataset(fileName=self.inputFile, + calendar=calendar, + variableList=self.variableList, + startDate=self.startDate, + endDate=self.endDate) # Observations have been processed to the nino34Index prior to reading dsObs = xr.open_dataset(dataPath, decode_cf=False, decode_times=False) @@ -163,6 +159,7 @@ def run_task(self): # {{{ nino34Obs = dsObs.sst self.logger.info(' Compute NINO3.4 index...') + varName = self.variableList[0] regionSST = ds[varName].isel(nOceanRegions=regionIndex) nino34 = self._compute_nino34_index(regionSST, calendar) diff --git a/mpas_analysis/ocean/meridional_heat_transport.py b/mpas_analysis/ocean/meridional_heat_transport.py index 475d39ef1..fbf77797f 100644 --- a/mpas_analysis/ocean/meridional_heat_transport.py +++ b/mpas_analysis/ocean/meridional_heat_transport.py @@ -100,8 +100,6 @@ def setup_and_check(self): # {{{ self.mhtFile = mhtFiles[0] - self.simulationStartTime = get_simulation_start_time(self.runStreams) - self.sectionName = 'meridionalHeatTransport' # Read in obs file information diff --git a/mpas_analysis/ocean/streamfunction_moc.py b/mpas_analysis/ocean/streamfunction_moc.py index 94ba37380..1d1b1187f 100644 --- a/mpas_analysis/ocean/streamfunction_moc.py +++ b/mpas_analysis/ocean/streamfunction_moc.py @@ -3,7 +3,6 @@ import numpy as np import netCDF4 import os -from functools import partial from ..shared.constants.constants import m3ps_to_Sv from ..shared.plot.plotting import plot_vertical_section,\ @@ -11,15 +10,12 @@ from ..shared.io.utility import build_config_full_path, make_directories -from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset +from ..shared.io import open_mpas_dataset -from ..shared.timekeeping.utility import get_simulation_start_time, \ - days_to_datetime +from ..shared.timekeeping.utility import days_to_datetime from ..shared import AnalysisTask -from ..shared.time_series import cache_time_series from ..shared.html import write_image_xml @@ -104,35 +100,17 @@ def setup_and_check(self): # {{{ analysisOptionName='config_am_mocstreamfunction_enable', raiseException=False) - # Get a list of timeSeriesStats output files from the streams file, - # reading only those that are between the start and end dates - # First a list necessary for the streamfunctionMOC climatology - streamName = 'timeSeriesStatsMonthlyOutput' - - self.simulationStartTime = get_simulation_start_time(self.runStreams) - - # Then a list necessary for the streamfunctionMOC Atlantic timeseries self.startDateTseries = config.get('timeSeries', 'startDate') self.endDateTseries = config.get('timeSeries', 'endDate') - self.inputFilesTseries = \ - self.historyStreams.readpath(streamName, - startDate=self.startDateTseries, - endDate=self.endDateTseries, - calendar=self.calendar) - if len(self.inputFilesTseries) == 0: - raise IOError('No files were found in stream {} between {} and ' - '{}.'.format(streamName, self.startDateTseries, - self.endDateTseries)) - self.startYearTseries = config.getint('timeSeries', 'startYear') self.endYearTseries = config.getint('timeSeries', 'endYear') self.sectionName = 'streamfunctionMOC' - variableList = ['timeMonthly_avg_normalVelocity', - 'timeMonthly_avg_vertVelocityTop'] + self.variableList = ['timeMonthly_avg_normalVelocity', + 'timeMonthly_avg_vertVelocityTop'] - self.mpasClimatologyTask.add_variables(variableList=variableList, + self.mpasClimatologyTask.add_variables(variableList=self.variableList, seasons=['ANN']) self.xmlFileNames = [] @@ -174,11 +152,6 @@ def run_task(self): # {{{ self.logger.info("\nPlotting streamfunction of Meridional Overturning " "Circulation (MOC)...") - self.logger.info('\n List of files for time series:\n' - ' {} through\n {}'.format( - os.path.basename(self.inputFilesTseries[0]), - os.path.basename(self.inputFilesTseries[-1]))) - config = self.config # **** Compute MOC **** @@ -476,30 +449,18 @@ def _compute_moc_time_series_postprocess(self): # {{{ 'time series...') self.logger.info(' Load data...') - config = self.config + outputDirectory = build_config_full_path(self.config, 'output', + 'timeseriesSubdirectory') + try: + os.makedirs(outputDirectory) + except OSError: + pass - self.simulationStartTime = get_simulation_start_time(self.runStreams) - variableList = ['timeMonthly_avg_normalVelocity', - 'timeMonthly_avg_vertVelocityTop'] + outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ refTopDepth, refLayerThickness = self._load_mesh() - if config.has_option(self.sectionName, 'maxChunkSize'): - chunking = config.getExpression(self.sectionName, 'maxChunkSize') - else: - chunking = None - - ds = open_multifile_dataset( - fileNames=self.inputFilesTseries, - calendar=self.calendar, - config=config, - simulationStartTime=self.simulationStartTime, - timeVariableName=['xtime_startMonthly', 'xtime_endMonthly'], - variableList=variableList, - startDate=self.startDateTseries, - endDate=self.endDateTseries, - chunking=chunking) latAtlantic = self.lat['Atlantic'] dLat = latAtlantic - 26.5 indlat26 = np.where(dLat == np.amin(np.abs(dLat))) @@ -510,49 +471,55 @@ def _compute_moc_time_series_postprocess(self): # {{{ transectEdgeMaskSigns = dictRegion['transectEdgeMaskSigns'] regionCellMask = dictRegion['cellMask'] - outputDirectory = build_config_full_path(config, 'output', - 'timeseriesSubdirectory') - try: - os.makedirs(outputDirectory) - except OSError: - pass + streamName = 'timeSeriesStatsMonthlyOutput' + inputFilesTseries = sorted(self.historyStreams.readpath( + streamName, startDate=self.startDateTseries, + endDate=self.endDateTseries, calendar=self.calendar)) - outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) + dates = sorted([fileName[-13:-6] for fileName in inputFilesTseries]) + years = np.array([int(date[0:4]) for date in dates]) + months = np.array([int(date[5:7]) for date in dates]) + + mocRegion = np.zeros(len(inputFilesTseries)) + times = np.zeros(len(inputFilesTseries)) + computed = np.zeros(len(inputFilesTseries), bool) continueOutput = os.path.exists(outputFileTseries) if continueOutput: self.logger.info(' Read in previously computed MOC time series') - - # add all the other arguments to the function - comp_moc_part = partial(self._compute_moc_time_series_part, ds, - areaCell, latCell, indlat26, - maxEdgesInTransect, transectEdgeGlobalIDs, - transectEdgeMaskSigns, nVertLevels, dvEdge, - refLayerThickness, latAtlantic, regionCellMask) - - dsMOCTimeSeries = cache_time_series( - ds.Time.values, comp_moc_part, outputFileTseries, - self.calendar, yearsPerCacheUpdate=1, logger=self.logger) - - return dsMOCTimeSeries # }}} - - def _compute_moc_time_series_part(self, ds, areaCell, latCell, indlat26, - maxEdgesInTransect, - transectEdgeGlobalIDs, - transectEdgeMaskSigns, nVertLevels, - dvEdge, refLayerThickness, latAtlantic, - regionCellMask, timeIndices, firstCall): - # computes a subset of the MOC time series - - if firstCall: - self.logger.info(' Process and save time series') - - times = ds.Time[timeIndices].values - mocRegion = np.zeros(timeIndices.shape) - - for localIndex, timeIndex in enumerate(timeIndices): - time = times[localIndex] - dsLocal = ds.isel(Time=timeIndex) + dsMOCIn = xr.open_dataset(outputFileTseries, decode_times=False) + + # first, copy all computed data + for inIndex in range(dsMOCIn.dims['Time']): + mask = np.logical_and( + dsMOCIn.year[inIndex].values == years, + dsMOCIn.month[inIndex].values == months) + + outIndex = np.where(mask)[0][0] + + mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex] + times[outIndex] = dsMOCIn.Time[inIndex] + computed[outIndex] = True + + if np.all(computed): + # no need to waste time writing out the data set again + return dsMOCIn + + dsMOCIn.close() + + for timeIndex, fileName in enumerate(inputFilesTseries): + if computed[timeIndex]: + continue + + dsLocal = open_mpas_dataset( + fileName=fileName, + calendar=self.calendar, + variableList=self.variableList, + startDate=self.startDateTseries, + endDate=self.endDateTseries) + dsLocal = dsLocal.isel(Time=0) + time = dsLocal.Time.values + times[timeIndex] = time date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format(date.year, @@ -569,7 +536,7 @@ def _compute_moc_time_series_part(self, ds, areaCell, latCell, indlat26, horizontalVel) mocTop = self._compute_moc(latAtlantic, nVertLevels, latCell, regionCellMask, transportZ, velArea) - mocRegion[localIndex] = np.amax(mocTop[:, indlat26]) + mocRegion[timeIndex] = np.amax(mocTop[:, indlat26]) description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ 'Array latitude (26.5N)' @@ -578,14 +545,24 @@ def _compute_moc_time_series_part(self, ds, areaCell, latCell, indlat26, 'coords': {'Time': {'dims': ('Time'), 'data': times, - 'attrs': {'units': 'days since 0001-01-01'}}}, + 'attrs': {'units': 'days since 0001-01-01'}}, + 'year': + {'dims': ('Time'), + 'data': years, + 'attrs': {'units': 'year'}}, + 'month': + {'dims': ('Time'), + 'data': months, + 'attrs': {'units': 'month'}}}, 'data_vars': {'mocAtlantic26': {'dims': ('Time'), 'data': mocRegion, 'attrs': {'units': 'Sv (10^6 m^3/s)', 'description': description}}}} - dsMOC = xr.Dataset.from_dict(dictonary) - return dsMOC + dsMOCTimeSeries = xr.Dataset.from_dict(dictonary) + dsMOCTimeSeries.to_netcdf(outputFileTseries) + + return dsMOCTimeSeries # def _compute_moc_analysismember(self): # diff --git a/mpas_analysis/ocean/time_series_ohc.py b/mpas_analysis/ocean/time_series_ohc.py index 72980cfda..4e7138b19 100644 --- a/mpas_analysis/ocean/time_series_ohc.py +++ b/mpas_analysis/ocean/time_series_ohc.py @@ -1,21 +1,18 @@ # -*- coding: utf-8 -*- import numpy as np import netCDF4 -import os from ..shared import AnalysisTask from ..shared.plot.plotting import timeseries_analysis_plot, \ plot_vertical_section, setup_colormap -from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset +from ..shared.generalized_reader import open_multifile_dataset +from ..shared.io import open_mpas_dataset from ..shared.timekeeping.utility import get_simulation_start_time, \ date_to_days, days_to_datetime, string_to_datetime -from ..shared.time_series import time_series - from ..shared.io.utility import build_config_full_path, make_directories, \ check_path_exists from ..shared.html import write_image_xml @@ -25,12 +22,18 @@ class TimeSeriesOHC(AnalysisTask): """ Performs analysis of ocean heat content (OHC) from time-series output. + Attributes + ---------- + + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis, Milena Veneziani, Greg Streletz """ - def __init__(self, config): # {{{ + def __init__(self, config, mpasTimeSeriesTask): # {{{ """ Construct the analysis task. @@ -39,6 +42,9 @@ def __init__(self, config): # {{{ config : instance of MpasAnalysisConfigParser Contains configuration options + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis @@ -50,6 +56,10 @@ def __init__(self, config): # {{{ componentName='ocean', tags=['timeSeries', 'ohc']) + self.mpasTimeSeriesTask = mpasTimeSeriesTask + + self.run_after(mpasTimeSeriesTask) + # }}} def setup_and_check(self): # {{{ @@ -74,27 +84,27 @@ def setup_and_check(self): # {{{ config = self.config - self.check_analysis_enabled( - analysisOptionName='config_am_timeseriesstatsmonthly_enable', - raiseException=True) + self.startDate = self.config.get('timeSeries', 'startDate') + self.endDate = self.config.get('timeSeries', 'endDate') + + self.variables = {} + for suffix in ['avgLayerTemperature', 'avgLayerSalinity', + 'sumLayerMaskValue', 'avgLayerArea', + 'avgLayerThickness']: + self.variables[suffix] = \ + 'timeMonthly_avg_avgValueWithinOceanLayerRegion_' + suffix + + self.mpasTimeSeriesTask.add_variables( + variableList=self.variables.values()) + + self.inputFile = self.mpasTimeSeriesTask.outputFile if config.get('runs', 'preprocessedReferenceRunName') != 'None': check_path_exists(config.get('oceanPreprocessedReference', 'baseDirectory')) - # get a list of timeSeriesStats output files from the streams file, - # reading only those that are between the start and end dates - self.streamName = 'timeSeriesStatsMonthlyOutput' self.startDate = self.config.get('timeSeries', 'startDate') self.endDate = self.config.get('timeSeries', 'endDate') - self.inputFiles = self.historyStreams.readpath( - self.streamName, startDate=self.startDate, - endDate=self.endDate, calendar=self.calendar) - - if len(self.inputFiles) == 0: - raise IOError('No files were found in stream {} between {} and ' - '{}.'.format(self.streamName, self.startDate, - self.endDate)) mainRunName = config.get('runs', 'mainRunName') regions = config.getExpression('regions', 'regions') @@ -111,14 +121,16 @@ def setup_and_check(self): # {{{ 'plotOriginalFields') if plotOriginalFields: - plotTypes = ['TZ', 'TAnomalyZ', 'SZ', 'SAnomalyZ', 'OHCZ', 'OHCAnomalyZ', 'OHC', 'OHCAnomaly'] + plotTypes = ['TZ', 'TAnomalyZ', 'SZ', 'SAnomalyZ', 'OHCZ', + 'OHCAnomalyZ', 'OHC', 'OHCAnomaly'] else: plotTypes = ['TAnomalyZ', 'SAnomalyZ', 'OHCAnomalyZ', 'OHCAnomaly'] for region in regions: for plotType in plotTypes: filePrefix = '{}_{}_{}'.format(plotType, region, mainRunName) - self.xmlFileNames.append('{}/{}.xml'.format(self.plotsDirectory, filePrefix)) + self.xmlFileNames.append('{}/{}.xml'.format( + self.plotsDirectory, filePrefix)) self.filePrefixes[plotType, region] = filePrefix return # }}} @@ -138,6 +150,7 @@ def run_task(self): # {{{ simulationStartTime = get_simulation_start_time(self.runStreams) config = self.config calendar = self.calendar + variables = self.variables # read parameters from config file mainRunName = config.get('runs', 'mainRunName') @@ -154,11 +167,11 @@ def run_task(self): # {{{ compareWithObservations = config.getboolean(configSectionName, 'compareWithObservations') - movingAveragePointsTimeSeries = config.getint(configSectionName, - 'movingAveragePointsTimeSeries') + movingAveragePointsTimeSeries = config.getint( + configSectionName, 'movingAveragePointsTimeSeries') - movingAveragePointsHovmoller = config.getint(configSectionName, - 'movingAveragePointsHovmoller') + movingAveragePointsHovmoller = config.getint( + configSectionName, 'movingAveragePointsHovmoller') regions = config.getExpression('regions', 'regions') plotTitles = config.getExpression('regions', 'plotTitles') @@ -182,11 +195,6 @@ def run_task(self): # {{{ raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for OHC calculation') - self.logger.info('\n Reading files:\n' - ' {} through\n {}'.format( - os.path.basename(self.inputFiles[0]), - os.path.basename(self.inputFiles[-1]))) - # Define/read in general variables self.logger.info(' Read in depth and compute specific depth ' 'indexes...') @@ -202,27 +210,14 @@ def run_task(self): # {{{ # Load data self.logger.info(' Load ocean data...') - avgTemperatureVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature' - avgSalinityVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerSalinity' - sumMaskVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_sumLayerMaskValue' - avgAreaVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerArea' - avgThickVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerThickness' - variableList = [avgTemperatureVarName, avgSalinityVarName, sumMaskVarName, - avgAreaVarName, avgThickVarName] - ds = open_multifile_dataset(fileNames=self.inputFiles, - calendar=calendar, - config=config, - simulationStartTime=simulationStartTime, - timeVariableName=['xtime_startMonthly', - 'xtime_endMonthly'], - variableList=variableList, - startDate=self.startDate, - endDate=self.endDate) + ds = open_mpas_dataset(fileName=self.inputFile, + calendar=calendar, + variableList=self.variables.values(), + startDate=self.startDate, + endDate=self.endDate) + # rename the variables to shorter names for convenience + renameDict = dict((v, k) for k, v in variables.iteritems()) + ds.rename(renameDict, inplace=True) timeStart = string_to_datetime(self.startDate) timeEnd = string_to_datetime(self.endDate) @@ -233,25 +228,20 @@ def run_task(self): # {{{ startDateFirstYear = simulationStartTime firstYear = int(startDateFirstYear[0:4]) endDateFirstYear = '{:04d}-12-31_23:59:59'.format(firstYear) - filesFirstYear = \ - self.historyStreams.readpath(self.streamName, - startDate=startDateFirstYear, - endDate=endDateFirstYear, - calendar=calendar) - dsFirstYear = open_multifile_dataset( - fileNames=filesFirstYear, + dsFirstYear = open_mpas_dataset( + fileName=self.inputFile, calendar=calendar, - config=config, - simulationStartTime=simulationStartTime, - timeVariableName=['xtime_startMonthly', 'xtime_endMonthly'], - variableList=[avgTemperatureVarName, avgSalinityVarName], + variableList=self.variables.values(), startDate=startDateFirstYear, endDate=endDateFirstYear) - firstYearAvgLayerTemperature = dsFirstYear[avgTemperatureVarName] - firstYearAvgLayerSalinity = dsFirstYear[avgSalinityVarName] + + dsFirstYear.rename(renameDict, inplace=True) + + firstYearAvgLayerTemperature = dsFirstYear['avgLayerTemperature'] + firstYearavgLayerSalinity = dsFirstYear['avgLayerSalinity'] else: - firstYearAvgLayerTemperature = ds[avgTemperatureVarName] - firstYearAvgLayerSalinity = ds[avgSalinityVarName] + firstYearAvgLayerTemperature = ds['avgLayerTemperature'] + firstYearavgLayerSalinity = ds['avgLayerSalinity'] firstYear = timeStart.year timeStartFirstYear = date_to_days(year=firstYear, month=1, day=1, @@ -265,16 +255,18 @@ def run_task(self): # {{{ firstYearAvgLayerTemperature = \ firstYearAvgLayerTemperature.mean('Time') - firstYearAvgLayerSalinity = firstYearAvgLayerSalinity.sel( + firstYearavgLayerSalinity = firstYearavgLayerSalinity.sel( Time=slice(timeStartFirstYear, timeEndFirstYear)) - firstYearAvgLayerSalinity = \ - firstYearAvgLayerSalinity.mean('Time') + firstYearavgLayerSalinity = \ + firstYearavgLayerSalinity.mean('Time') self.logger.info(' Compute temperature and salinity anomalies...') - ds['avgLayerTemperatureAnomaly'] = (ds[avgTemperatureVarName] - firstYearAvgLayerTemperature) + ds['avgLayerTemperatureAnomaly'] = \ + (ds['avgLayerTemperature'] - firstYearAvgLayerTemperature) - ds['avgLayerSalinityAnomaly'] = (ds[avgSalinityVarName] - firstYearAvgLayerSalinity) + ds['avgLayerSalinityAnomaly'] = \ + (ds['avgLayerSalinity'] - firstYearavgLayerSalinity) yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year @@ -292,7 +284,6 @@ def run_task(self): # {{{ fileNames=inFilesPreprocessed, calendar=calendar, config=config, - simulationStartTime=simulationStartTime, timeVariableName='xtime') yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), calendar=calendar).year @@ -305,16 +296,8 @@ def run_task(self): # {{{ 'plotted.') preprocessedReferenceRunName = 'None' - cacheFileName = '{}/ohcTimeSeries.nc'.format(outputDirectory) - - # store fields needed by _compute_ohc_part - self.ds = ds - self.regionNames = regionNames - dsOHC = time_series.cache_time_series(ds.Time.values, - self._compute_ohc_part, - cacheFileName, calendar, - yearsPerCacheUpdate=10, - logger=self.logger) + # Add the OHC to the data set + ds = self._compute_ohc(ds, regionNames) unitsScalefactor = 1e-22 @@ -328,7 +311,8 @@ def run_task(self): # {{{ # First T vs depth/time x = ds.Time.values y = depth - z = ds.avgLayerTemperatureAnomaly.isel(nOceanRegionsTmp=regionIndex) + z = ds.avgLayerTemperatureAnomaly.isel( + nOceanRegionsTmp=regionIndex) z = z.transpose() colorbarLabel = '[$^\circ$C]' @@ -368,7 +352,7 @@ def run_task(self): # {{{ imageCaption=caption) if plotOriginalFields: - z = ds[avgTemperatureVarName].isel(nOceanRegionsTmp=regionIndex) + z = ds['avgLayerTemperature'].isel(nOceanRegionsTmp=regionIndex) z = z.transpose() title = 'Temperature, {} \n {}'.format(plotTitles[regionIndex], mainRunName) @@ -441,7 +425,7 @@ def run_task(self): # {{{ imageCaption=caption) if plotOriginalFields: - z = ds[avgSalinityVarName].isel(nOceanRegionsTmp=regionIndex) + z = ds['avgLayerSalinity'].isel(nOceanRegionsTmp=regionIndex) z = z.transpose() title = 'Salinity, {} \n {}'.format(plotTitles[regionIndex], mainRunName) @@ -476,7 +460,7 @@ def run_task(self): # {{{ # Third OHC vs depth/time - ohcAnomaly = dsOHC.ohcAnomaly.isel(nOceanRegionsTmp=regionIndex) + ohcAnomaly = ds.ohcAnomaly.isel(nOceanRegionsTmp=regionIndex) ohcAnomalyScaled = unitsScalefactor*ohcAnomaly z = ohcAnomalyScaled.transpose() @@ -515,7 +499,7 @@ def run_task(self): # {{{ imageCaption=caption) if plotOriginalFields: - ohc = dsOHC.ohc.isel(nOceanRegionsTmp=regionIndex) + ohc = ds.ohc.isel(nOceanRegionsTmp=regionIndex) ohcScaled = unitsScalefactor*ohc z = ohcScaled.transpose() @@ -665,13 +649,11 @@ def run_task(self): # {{{ imageDescription=caption, imageCaption=caption) - # }}} - - def _compute_ohc_part(self, timeIndices, firstCall): # {{{ + def _compute_ohc(self, ds, regionNames): # {{{ ''' - Compute part of the OHC time series, given time indices to process. + Compute the OHC time series. ''' # specific heat [J/(kg*degC)] @@ -679,34 +661,22 @@ def _compute_ohc_part(self, timeIndices, firstCall): # {{{ # [kg/m3] rho = self.namelist.getfloat('config_density0') - dsLocal = self.ds.isel(Time=timeIndices) - - - avgTemperatureVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature' - sumMaskVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_sumLayerMaskValue' - avgAreaVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerArea' - avgThickVarName = \ - 'timeMonthly_avg_avgValueWithinOceanLayerRegion_avgLayerThickness' - - - dsLocal['ohc'] = rho*cp*dsLocal[sumMaskVarName] * \ - dsLocal[avgAreaVarName] * dsLocal[avgThickVarName] * \ - dsLocal[avgTemperatureVarName] - dsLocal.ohc.attrs['units'] = 'J' - dsLocal.ohc.attrs['description'] = 'Ocean heat content in each region' + ds['ohc'] = rho*cp*ds['sumLayerMaskValue'] * \ + ds['avgLayerArea'] * ds['avgLayerThickness'] * \ + ds['avgLayerTemperature'] + ds.ohc.attrs['units'] = 'J' + ds.ohc.attrs['description'] = 'Ocean heat content in each region' - dsLocal['ohcAnomaly'] = rho*cp*dsLocal[sumMaskVarName] * \ - dsLocal[avgAreaVarName] * dsLocal[avgThickVarName] * \ - dsLocal.avgLayerTemperatureAnomaly - dsLocal.ohcAnomaly.attrs['units'] = 'J' - dsLocal.ohcAnomaly.attrs['description'] = 'Ocean heat content anomaly in each region' + ds['ohcAnomaly'] = rho*cp*ds['sumLayerMaskValue'] * \ + ds['avgLayerArea'] * ds['avgLayerThickness'] * \ + ds.avgLayerTemperatureAnomaly + ds.ohcAnomaly.attrs['units'] = 'J' + ds.ohcAnomaly.attrs['description'] = \ + 'Ocean heat content anomaly in each region' - dsLocal['regionNames'] = ('nOceanRegionsTmp', self.regionNames) + ds['regionNames'] = ('nOceanRegionsTmp', regionNames) - return dsLocal # }}} + return ds # }}} # }}} diff --git a/mpas_analysis/ocean/time_series_sst.py b/mpas_analysis/ocean/time_series_sst.py index c39e0aacd..3607ebba9 100644 --- a/mpas_analysis/ocean/time_series_sst.py +++ b/mpas_analysis/ocean/time_series_sst.py @@ -1,16 +1,11 @@ -import os - from ..shared import AnalysisTask from ..shared.plot.plotting import timeseries_analysis_plot -from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset - -from ..shared.timekeeping.utility import get_simulation_start_time, \ - date_to_days, days_to_datetime +from ..shared.generalized_reader import open_multifile_dataset +from ..shared.io import open_mpas_dataset -from ..shared.time_series import time_series +from ..shared.timekeeping.utility import date_to_days, days_to_datetime from ..shared.io.utility import build_config_full_path, make_directories, \ check_path_exists @@ -22,12 +17,18 @@ class TimeSeriesSST(AnalysisTask): Performs analysis of the time-series output of sea-surface temperature (SST). + Attributes + ---------- + + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis, Milena Veneziani """ - def __init__(self, config): # {{{ + def __init__(self, config, mpasTimeSeriesTask): # {{{ """ Construct the analysis task. @@ -36,6 +37,9 @@ def __init__(self, config): # {{{ config : instance of MpasAnalysisConfigParser Contains configuration options + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis @@ -47,6 +51,10 @@ def __init__(self, config): # {{{ componentName='ocean', tags=['timeSeries', 'sst']) + self.mpasTimeSeriesTask = mpasTimeSeriesTask + + self.run_after(mpasTimeSeriesTask) + # }}} def setup_and_check(self): # {{{ @@ -68,31 +76,20 @@ def setup_and_check(self): # {{{ # self.calendar super(TimeSeriesSST, self).setup_and_check() - self.check_analysis_enabled( - analysisOptionName='config_am_timeseriesstatsmonthly_enable', - raiseException=True) - config = self.config + self.startDate = self.config.get('timeSeries', 'startDate') + self.endDate = self.config.get('timeSeries', 'endDate') + + self.variableList = \ + ['timeMonthly_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + self.mpasTimeSeriesTask.add_variables(variableList=self.variableList) + if config.get('runs', 'preprocessedReferenceRunName') != 'None': check_path_exists(config.get('oceanPreprocessedReference', 'baseDirectory')) - # get a list of timeSeriesStats output files from the streams file, - # reading only those that are between the start and end dates - streamName = 'timeSeriesStatsMonthlyOutput' - self.startDate = config.get('timeSeries', 'startDate') - self.endDate = config.get('timeSeries', 'endDate') - self.inputFiles = \ - self.historyStreams.readpath(streamName, - startDate=self.startDate, - endDate=self.endDate, - calendar=self.calendar) - - if len(self.inputFiles) == 0: - raise IOError('No files were found in stream {} between {} and ' - '{}.'.format(streamName, self.startDate, - self.endDate)) + self.inputFile = self.mpasTimeSeriesTask.outputFile mainRunName = config.get('runs', 'mainRunName') regions = config.getExpression('regions', 'regions') @@ -126,15 +123,9 @@ def run_task(self): # {{{ self.logger.info(' Load SST data...') - simulationStartTime = get_simulation_start_time(self.runStreams) config = self.config calendar = self.calendar - self.logger.info('\n Reading files:\n' - ' {} through\n {}'.format( - os.path.basename(self.inputFiles[0]), - os.path.basename(self.inputFiles[-1]))) - mainRunName = config.get('runs', 'mainRunName') preprocessedReferenceRunName = \ config.get('runs', 'preprocessedReferenceRunName') @@ -157,22 +148,14 @@ def run_task(self): # {{{ regionNames = config.getExpression('regions', 'regions') regionNames = [regionNames[index] for index in regionIndicesToPlot] - # Load data: - varName = \ - 'timeMonthly_avg_avgValueWithinOceanRegion_avgSurfaceTemperature' - varList = [varName] - ds = open_multifile_dataset(fileNames=self.inputFiles, - calendar=calendar, - config=config, - simulationStartTime=simulationStartTime, - timeVariableName=['xtime_startMonthly', - 'xtime_endMonthly'], - variableList=varList, - startDate=self.startDate, - endDate=self.endDate) - - yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year - yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year + dsSST = open_mpas_dataset(fileName=self.inputFile, + calendar=calendar, + variableList=self.variableList, + startDate=self.startDate, + endDate=self.endDate) + + yearStart = days_to_datetime(dsSST.Time.min(), calendar=calendar).year + yearEnd = days_to_datetime(dsSST.Time.max(), calendar=calendar).year timeStart = date_to_days(year=yearStart, month=1, day=1, calendar=calendar) timeEnd = date_to_days(year=yearEnd, month=12, day=31, @@ -187,7 +170,6 @@ def run_task(self): # {{{ fileNames=inFilesPreprocessed, calendar=calendar, config=config, - simulationStartTime=simulationStartTime, timeVariableName='xtime') yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), calendar=calendar).year @@ -200,16 +182,6 @@ def run_task(self): # {{{ 'plotted.') preprocessedReferenceRunName = 'None' - cacheFileName = '{}/sstTimeSeries.nc'.format(outputDirectory) - - # save ds so it's avaliable in _compute_sst_part - self.ds = ds - dsSST = time_series.cache_time_series(ds.Time.values, - self._compute_sst_part, - cacheFileName, calendar, - yearsPerCacheUpdate=10, - logger=self.logger) - self.logger.info(' Make plots...') for regionIndex in regionIndicesToPlot: region = regions[regionIndex] @@ -219,6 +191,7 @@ def run_task(self): # {{{ xLabel = 'Time [years]' yLabel = '[$^\circ$C]' + varName = self.variableList[0] SST = dsSST[varName].isel(nOceanRegions=regionIndex) filePrefix = self.filePrefixes[region] @@ -229,7 +202,7 @@ def run_task(self): # {{{ SST_v0 = dsPreprocessedTimeSlice.SST title = '{}\n {} (red)'.format(title, - preprocessedReferenceRunName) + preprocessedReferenceRunName) timeseries_analysis_plot(config, [SST, SST_v0], movingAveragePoints, title, xLabel, yLabel, figureName, @@ -258,14 +231,6 @@ def run_task(self): # {{{ # }}} - def _compute_sst_part(self, timeIndices, firstCall): # {{{ - ''' - Compute part of the SST time series, given time indices to process. - ''' - dsLocal = self.ds.isel(Time=timeIndices) - return dsLocal - # }}} - # }}} # vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/sea_ice/time_series.py b/mpas_analysis/sea_ice/time_series.py index e4e2049b0..47098f517 100644 --- a/mpas_analysis/sea_ice/time_series.py +++ b/mpas_analysis/sea_ice/time_series.py @@ -1,5 +1,4 @@ import xarray as xr -import os from .sea_ice_analysis_task import SeaIceAnalysisTask @@ -13,11 +12,10 @@ datetime_to_days from ..shared.timekeeping.MpasRelativeDelta import MpasRelativeDelta -from ..shared.generalized_reader.generalized_reader \ - import open_multifile_dataset +from ..shared.generalized_reader import open_multifile_dataset +from ..shared.io import open_mpas_dataset from ..shared.mpas_xarray.mpas_xarray import subset_variables -from ..shared.time_series import cache_time_series from ..shared.html import write_image_xml @@ -25,12 +23,18 @@ class TimeSeriesSeaIce(SeaIceAnalysisTask): """ Performs analysis of time series of sea-ice properties. + Attributes + ---------- + + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis, Milena Veneziani """ - def __init__(self, config): # {{{ + def __init__(self, config, mpasTimeSeriesTask): # {{{ """ Construct the analysis task. @@ -39,6 +43,9 @@ def __init__(self, config): # {{{ config : instance of MpasAnalysisConfigParser Contains configuration options + mpasTimeSeriesTask : ``MpasTimeSeriesTask`` + The task that extracts the time series from MPAS monthly output + Authors ------- Xylar Asay-Davis @@ -50,6 +57,10 @@ def __init__(self, config): # {{{ componentName='seaIce', tags=['timeSeries']) + self.mpasTimeSeriesTask = mpasTimeSeriesTask + + self.run_after(mpasTimeSeriesTask) + # }}} def setup_and_check(self): # {{{ @@ -72,31 +83,21 @@ def setup_and_check(self): # {{{ # self.calendar super(TimeSeriesSeaIce, self).setup_and_check() - self.check_analysis_enabled( - analysisOptionName='config_am_timeseriesstatsmonthly_enable', - raiseException=True) - config = self.config + + self.startDate = self.config.get('timeSeries', 'startDate') + self.endDate = self.config.get('timeSeries', 'endDate') + + self.variableList = ['timeMonthly_avg_iceAreaCell', + 'timeMonthly_avg_iceVolumeCell'] + self.mpasTimeSeriesTask.add_variables(variableList=self.variableList) + + self.inputFile = self.mpasTimeSeriesTask.outputFile + if config.get('runs', 'preprocessedReferenceRunName') != 'None': check_path_exists(config.get('seaIcePreprocessedReference', 'baseDirectory')) - # get a list of timeSeriesStatsMonthly output files from the streams - # file, reading only those that are between the start and end dates - streamName = 'timeSeriesStatsMonthlyOutput' - self.startDate = config.get('timeSeries', 'startDate') - self.endDate = config.get('timeSeries', 'endDate') - self.inputFiles = \ - self.historyStreams.readpath(streamName, - startDate=self.startDate, - endDate=self.endDate, - calendar=self.calendar) - - if len(self.inputFiles) == 0: - raise IOError('No files were found in stream {} between {} and ' - '{}.'.format(streamName, self.startDate, - self.endDate)) - # these are redundant for now. Later cleanup is needed where these # file names are reused in run() self.xmlFileNames = [] @@ -151,11 +152,6 @@ def run_task(self): # {{{ config = self.config calendar = self.calendar - self.logger.info('\n Reading files:\n' - ' {} through\n {}'.format( - os.path.basename(self.inputFiles[0]), - os.path.basename(self.inputFiles[-1]))) - plotTitles = {'iceArea': 'Sea-ice area', 'iceVolume': 'Sea-ice volume', 'iceThickness': 'Sea-ice mean thickness'} @@ -208,14 +204,10 @@ def run_task(self): # {{{ 'areaCell']) # Load data - ds = open_multifile_dataset( - fileNames=self.inputFiles, + ds = open_mpas_dataset( + fileName=self.inputFile, calendar=calendar, - config=config, - simulationStartTime=self.simulationStartTime, - timeVariableName=['xtime_startMonthly', 'xtime_endMonthly'], - variableList=['timeMonthly_avg_iceAreaCell', - 'timeMonthly_avg_iceVolumeCell'], + variableList=self.variableList, startDate=self.startDate, endDate=self.endDate) @@ -263,16 +255,8 @@ def run_task(self): # {{{ plotVars = {} for hemisphere in ['NH', 'SH']: - self.logger.info(' Caching {} data'.format(hemisphere)) - cacheFileName = '{}/seaIceAreaVolumeTimeSeries_{}.nc'.format( - outputDirectory, hemisphere) - # store some variables for use in _compute_area_vol_part - self.hemisphere = hemisphere - self.ds = ds - dsTimeSeries[hemisphere] = cache_time_series( - ds.Time.values, self._compute_area_vol_part, cacheFileName, - calendar, yearsPerCacheUpdate=10, logger=self.logger) + dsTimeSeries[hemisphere] = self._compute_area_vol(ds, hemisphere) self.logger.info(' Make {} plots...'.format(hemisphere)) @@ -574,20 +558,19 @@ def _replicate_cycle(self, ds, dsToReplicate, calendar): # {{{ return dsShift # }}} - def _compute_area_vol_part(self, timeIndices, firstCall): # {{{ + def _compute_area_vol(self, ds, hemisphere): # {{{ ''' Compute part of the time series of sea ice volume and area, given time indices to process. ''' - dsLocal = self.ds.isel(Time=timeIndices) - if self.hemisphere == 'NH': + if hemisphere == 'NH': mask = self.dsMesh.latCell > 0 else: mask = self.dsMesh.latCell < 0 - dsLocal = dsLocal.where(mask) + ds = ds.where(mask) - dsAreaSum = (dsLocal*self.dsMesh.areaCell).sum('nCells') + dsAreaSum = (ds*self.dsMesh.areaCell).sum('nCells') dsAreaSum = dsAreaSum.rename( {'timeMonthly_avg_iceAreaCell': 'iceArea', 'timeMonthly_avg_iceVolumeCell': 'iceVolume'}) @@ -596,13 +579,13 @@ def _compute_area_vol_part(self, timeIndices, firstCall): # {{{ dsAreaSum['iceArea'].attrs['units'] = 'm$^2$' dsAreaSum['iceArea'].attrs['description'] = \ - 'Total {} sea ice area'.format(self.hemisphere) + 'Total {} sea ice area'.format(hemisphere) dsAreaSum['iceVolume'].attrs['units'] = 'm$^3$' dsAreaSum['iceVolume'].attrs['description'] = \ - 'Total {} sea ice volume'.format(self.hemisphere) + 'Total {} sea ice volume'.format(hemisphere) dsAreaSum['iceThickness'].attrs['units'] = 'm' dsAreaSum['iceThickness'].attrs['description'] = \ - 'Mean {} sea ice volume'.format(self.hemisphere) + 'Mean {} sea ice volume'.format(hemisphere) return dsAreaSum # }}} diff --git a/mpas_analysis/shared/generalized_reader/__init__.py b/mpas_analysis/shared/generalized_reader/__init__.py index e69de29bb..07c0a4eda 100644 --- a/mpas_analysis/shared/generalized_reader/__init__.py +++ b/mpas_analysis/shared/generalized_reader/__init__.py @@ -0,0 +1 @@ +from generalized_reader import open_multifile_dataset \ No newline at end of file diff --git a/mpas_analysis/shared/io/__init__.py b/mpas_analysis/shared/io/__init__.py index 72eb702eb..b03a21757 100644 --- a/mpas_analysis/shared/io/__init__.py +++ b/mpas_analysis/shared/io/__init__.py @@ -1,3 +1,4 @@ from .namelist_streams_interface import NameList, StreamsFile from .utility import paths -from .write_netcdf import write_netcdf \ No newline at end of file +from .write_netcdf import write_netcdf +from .mpas_reader import open_mpas_dataset diff --git a/mpas_analysis/shared/io/mpas_reader.py b/mpas_analysis/shared/io/mpas_reader.py new file mode 100644 index 000000000..1e551da5f --- /dev/null +++ b/mpas_analysis/shared/io/mpas_reader.py @@ -0,0 +1,251 @@ +""" +Utility functions for reading a single MPAS file into xarray and for removing +all but a given list of variables from a data set. + +Authors +------- +Xylar Asay-Davis +""" + +import xarray + +from ..timekeeping.utility import string_to_days_since_date, days_to_datetime + + +def open_mpas_dataset(fileName, calendar, + timeVariableNames=['xtime_startMonthly', + 'xtime_endMonthly'], + variableList=None, startDate=None, endDate=None): # {{{ + """ + Opens and returns an xarray data set given file name(s) and the MPAS + calendar name. + + Parameters + ---------- + fileName : str + File path to read + + calendar : {``'gregorian'``, ``'gregorian_noleap'``}, optional + The name of one of the calendars supported by MPAS cores + + timeVariableNames : str or list of 2 str, optional + The name of the time variable (typically ``'xtime'`` + or ``['xtime_startMonthly', 'xtime_endMonthly']``) + + variableList : list of strings, optional + If present, a list of variables to be included in the data set + + startDate, endDate : string or datetime.datetime, optional + If present, the first and last dates to be used in the data set. The + time variable is sliced to only include dates within this range. + + Returns + ------- + ds : ``xarray.Dataset`` + + Raises + ------ + TypeError + If the time variable has an unsupported type (not a date string). + + ValueError + If the time variable is not found in the data set + + Authors + ------- + Xylar Asay-Davis + """ + + ds = xarray.open_dataset(fileName, decode_cf=True, decode_times=False, + lock=False) + + ds = _parse_dataset_time(ds, timeVariableNames, calendar) + + if startDate is not None and endDate is not None: + if isinstance(startDate, str): + startDate = string_to_days_since_date(dateString=startDate, + calendar=calendar) + if isinstance(endDate, str): + endDate = string_to_days_since_date(dateString=endDate, + calendar=calendar) + + # select only the data in the specified range of dates + ds = ds.sel(Time=slice(startDate, endDate)) + + if ds.dims['Time'] == 0: + raise ValueError('The data set contains no Time entries between ' + 'dates {} and {}.'.format( + days_to_datetime(startDate, calendar=calendar), + days_to_datetime(endDate, calendar=calendar))) + if variableList is not None: + ds = subset_variables(ds, variableList) + + return ds # }}} + + +def subset_variables(ds, variableList): # {{{ + """ + Given a data set and a list of variable names, returns a new data set that + contains only variables with those names. + + Parameters + ---------- + ds : ``xarray.DataSet`` object + The data set from which a subset of variables is to be extracted. + + variableList : string or list of strings + The names of the variables to be extracted. + + Returns + ------- + ds : ``xarray.DataSet`` object + A copy of the original data set with only the variables in + variableList. + + Raises + ------ + ValueError + If the resulting data set is empty. + + Authors + ------- + Phillip J. Wolfram, Xylar Asay-Davis + """ + + allvars = ds.data_vars.keys() + + # get set of variables to drop (all ds variables not in vlist) + dropvars = set(allvars) - set(variableList) + + # drop spurious variables + ds = ds.drop(dropvars) + + # must also drop all coordinates that are not associated with the variables + coords = set() + for avar in ds.data_vars.keys(): + coords |= set(ds[avar].coords.keys()) + dropcoords = set(ds.coords.keys()) - coords + + # drop spurious coordinates + ds = ds.drop(dropcoords) + + if len(ds.data_vars.keys()) == 0: + raise ValueError( + 'Empty dataset is returned.\n' + 'Variables {}\n' + 'are not found within the dataset ' + 'variables: {}.'.format(variableList, allvars)) + + return ds # }}} + + +def _parse_dataset_time(ds, inTimeVariableName, calendar, + outTimeVariableName='Time', + referenceDate='0001-01-01'): # {{{ + """ + A helper function for computing a time coordinate from an MPAS time + variable. Given a data set and a time variable name (or list of 2 + time names), returns a new data set with time coordinate + `outTimeVariableName` filled with days since `referenceDate` + + Parameters + ---------- + ds : ``xarray.DataSet`` + The data set containing an MPAS time variable to be used to build + an xarray time coordinate. + + inTimeVariableName : str or tuple or list of str + The name of the time variable in the MPAS data set that will be + used to build the 'Time' coordinate. The array(s) named by + inTimeVariableName should contain date strings. Typically, + inTimeVariableName is ``'xtime'``. If a list of two variable + names is provided, times from the two are averaged together to + determine the value of the time coordinate. In such cases, + inTimeVariableName is typically + ``['xtime_startMonthly', 'xtime_endMonthly']``. + + calendar : {'gregorian', 'gregorian_noleap'} + The name of one of the calendars supported by MPAS cores + + outTimeVariableName : str + The name of the coordinate to assign times to, typically 'Time'. + + referenceDate : str, optional + The reference date for the time variable, typically '0001-01-01', + taking one of the following forms:: + + 0001-01-01 + 0001-01-01 00:00:00 + + Returns + ------- + dsOut : ``xarray.DataSet`` + A copy of the input data set with the `outTimeVariableName` + coordinate containing the time coordinate parsed from + `inTimeVariableName`. + + Raises + ------ + TypeError + If the time variable has an unsupported type (not a date string + or a floating-pont number of days since the start of the simulatio). + + Authors + ------- + Xylar Asay-Davis + """ + + if isinstance(inTimeVariableName, (tuple, list)): + # we want to average the two + assert(len(inTimeVariableName) == 2) + + dsStart = _parse_dataset_time( + ds=ds, + inTimeVariableName=inTimeVariableName[0], + calendar=calendar, + outTimeVariableName=outTimeVariableName, + referenceDate=referenceDate) + dsEnd = _parse_dataset_time( + ds=ds, + inTimeVariableName=inTimeVariableName[1], + calendar=calendar, + outTimeVariableName=outTimeVariableName, + referenceDate=referenceDate) + starts = dsStart[outTimeVariableName].values + ends = dsEnd[outTimeVariableName].values + + # replace the time in starts with the mean of starts and ends + dsOut = dsStart.copy() + + dsOut.coords['startTime'] = (outTimeVariableName, starts) + dsOut.coords['endTime'] = (outTimeVariableName, ends) + + dsOut.coords[outTimeVariableName] = (outTimeVariableName, + [starts[i] + + (ends[i] - starts[i])/2 + for i in range(len(starts))]) + + else: + # there is just one time variable (either because we're recursively + # calling the function or because we're not averaging). + + timeVar = ds[inTimeVariableName] + + if timeVar.dtype != '|S64': + raise TypeError("timeVar of unsupported type {}. String variable " + "expected.".format(timeVar.dtype)) + + # this is an array of date strings like 'xtime' + # convert to string + timeStrings = [''.join(xtime).strip() for xtime in timeVar.values] + days = string_to_days_since_date(dateString=timeStrings, + referenceDate=referenceDate, + calendar=calendar) + + dsOut = ds.copy() + dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) + + return dsOut # }}} + + +# vim: ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/shared/time_series/__init__.py b/mpas_analysis/shared/time_series/__init__.py index 1815c2b88..ae4fdfde5 100644 --- a/mpas_analysis/shared/time_series/__init__.py +++ b/mpas_analysis/shared/time_series/__init__.py @@ -1 +1,2 @@ -from .time_series import cache_time_series \ No newline at end of file +from .time_series import cache_time_series +from mpas_time_series_task import MpasTimeSeriesTask diff --git a/mpas_analysis/shared/time_series/mpas_time_series_task.py b/mpas_analysis/shared/time_series/mpas_time_series_task.py new file mode 100644 index 000000000..01804d356 --- /dev/null +++ b/mpas_analysis/shared/time_series/mpas_time_series_task.py @@ -0,0 +1,329 @@ +import os +import warnings +import subprocess +from distutils.spawn import find_executable +import xarray as xr +import numpy + +from ..analysis_task import AnalysisTask + +from ..io.utility import build_config_full_path, make_directories +from ..timekeeping.utility import get_simulation_start_time + + +class MpasTimeSeriesTask(AnalysisTask): # {{{ + ''' + An analysis tasks for computing time series from output from the + ``timeSeriesStatsMonthly`` analysis member. + + Attributes + ---------- + + variableList : list of str + A list of variable names in ``timeSeriesStatsMonthly`` to be + included in the time series + + inputFiles : list of str + A list of input files from which to extract the time series. + + startDate, endDate : str + The start and end dates of the time series as strings + + startYear, endYear : int + The start and end years of the time series + + Authors + ------- + Xylar Asay-Davis + ''' + + def __init__(self, config, componentName, taskName=None, + subtaskName=None): # {{{ + ''' + Construct the analysis task for extracting time series. + + Parameters + ---------- + config : ``MpasAnalysisConfigParser`` + Contains configuration options + + componentName : {'ocean', 'seaIce'} + The name of the component (same as the folder where the task + resides) + + taskName : str, optional + The name of the task, 'mpasTimeSeriesOcean' or + 'mpasTimeSeriesSeaIce' by default (depending on ``componentName``) + + subtaskName : str, optional + The name of the subtask (if any) + + + Authors + ------- + Xylar Asay-Davis + ''' + self.variableList = [] + self.seasons = [] + + tags = ['timeSeries'] + + suffix = componentName[0].upper() + componentName[1:] + if taskName is None: + taskName = 'mpasTimeSeries{}'.format(suffix) + + # call the constructor from the base class (AnalysisTask) + super(MpasTimeSeriesTask, self).__init__( + config=config, + taskName=taskName, + subtaskName=subtaskName, + componentName=componentName, + tags=tags) + + # }}} + + def add_variables(self, variableList): # {{{ + ''' + Add one or more variables to extract as a time series. + + Parameters + ---------- + variableList : list of str + A list of variable names in ``timeSeriesStatsMonthly`` to be + included in the time series + + Authors + ------- + Xylar Asay-Davis + ''' + + for variable in variableList: + if variable not in self.variableList: + self.variableList.append(variable) + + # }}} + + def setup_and_check(self): # {{{ + ''' + Perform steps to set up the analysis and check for errors in the setup. + + Authors + ------- + Xylar Asay-Davis + ''' + + # first, call setup_and_check from the base class (AnalysisTask), + # which will perform some common setup, including storing: + # self.runDirectory , self.historyDirectory, self.plotsDirectory, + # self.namelist, self.runStreams, self.historyStreams, + # self.calendar + super(MpasTimeSeriesTask, self).setup_and_check() + + config = self.config + baseDirectory = build_config_full_path( + config, 'output', 'timeSeriesSubdirectory') + + make_directories(baseDirectory) + + self.outputFile = '{}/{}.nc'.format(baseDirectory, + self.fullTaskName) + + self.check_analysis_enabled( + analysisOptionName='config_am_timeseriesstatsmonthly_enable', + raiseException=True) + + # get a list of timeSeriesStats output files from the streams file, + # reading only those that are between the start and end dates + startDate = config.get('timeSeries', 'startDate') + endDate = config.get('timeSeries', 'endDate') + streamName = 'timeSeriesStatsMonthlyOutput' + self.inputFiles = self.historyStreams.readpath( + streamName, startDate=startDate, endDate=endDate, + calendar=self.calendar) + + if len(self.inputFiles) == 0: + raise IOError('No files were found in stream {} between {} and ' + '{}.'.format(streamName, startDate, endDate)) + + self._update_time_series_bounds_from_file_names() + + # Make sure first year of data is included for computing anomalies + simulationStartTime = get_simulation_start_time(self.runStreams) + firstYear = int(simulationStartTime[0:4]) + endDateFirstYear = '{:04d}-12-31_23:59:59'.format(firstYear) + firstYearInputFiles = self.historyStreams.readpath( + streamName, startDate=simulationStartTime, + endDate=endDateFirstYear, + calendar=self.calendar) + for fileName in firstYearInputFiles: + if fileName not in self.inputFiles: + self.inputFiles.append(fileName) + + self.inputFiles = sorted(self.inputFiles) + + # }}} + + def run_task(self): # {{{ + ''' + Compute the requested time series + + Authors + ------- + Xylar Asay-Davis + ''' + + if len(self.variableList) == 0: + # nothing to do + return + + self.logger.info('\nComputing MPAS time series from files:\n' + ' {} through\n {}'.format( + os.path.basename(self.inputFiles[0]), + os.path.basename(self.inputFiles[-1]))) + + self._compute_time_series_with_ncrcat() + + # }}} + + def _update_time_series_bounds_from_file_names(self): # {{{ + """ + Update the start and end years and dates for time series based on the + years actually available in the list of files. + + Authors + ------- + Xylar Asay-Davis + """ + + config = self.config + section = 'timeSeries' + + requestedStartYear = config.getint(section, 'startYear') + requestedEndYear = config.getint(section, 'endYear') + + dates = sorted([fileName[-13:-6] for fileName in self.inputFiles]) + years = [int(date[0:4]) for date in dates] + months = [int(date[5:7]) for date in dates] + + # search for the start of the first full year + firstIndex = 0 + while(firstIndex < len(years) and months[firstIndex] != 1): + firstIndex += 1 + startYear = years[firstIndex] + + # search for the end of the last full year + lastIndex = len(years)-1 + while(lastIndex >= 0 and months[lastIndex] != 12): + lastIndex -= 1 + endYear = years[lastIndex] + + if startYear != requestedStartYear or endYear != requestedEndYear: + message = "time series start and/or end year different from " \ + "requested\n" \ + "requestd: {:04d}-{:04d}\n" \ + "actual: {:04d}-{:04d}\n".format(requestedStartYear, + requestedEndYear, + startYear, + endYear) + warnings.warn(message) + config.set(section, 'startYear', str(startYear)) + config.set(section, 'endYear', str(endYear)) + + startDate = '{:04d}-01-01_00:00:00'.format(startYear) + config.set(section, 'startDate', startDate) + endDate = '{:04d}-12-31_23:59:59'.format(endYear) + config.set(section, 'endDate', endDate) + else: + startDate = config.get(section, 'startDate') + endDate = config.get(section, 'endDate') + + self.startDate = startDate + self.endDate = endDate + self.startYear = startYear + self.endYear = endYear + + # }}} + + def _compute_time_series_with_ncrcat(self): + # {{{ + ''' + Uses ncrcat to extact time series from timeSeriesMonthlyOutput files + + Raises + ------ + OSError + If ``ncrcat`` is not in the system path. + + Author + ------ + Xylar Asay-Davis + ''' + + if find_executable('ncrcat') is None: + raise OSError('ncrcat not found. Make sure the latest nco ' + 'package is installed: \n' + 'conda install nco\n' + 'Note: this presumes use of the conda-forge ' + 'channel.') + + if os.path.exists(self.outputFile): + # add only input files wiht times that aren't already in the + # output file + dates = sorted([fileName[-13:-6] for fileName in self.inputFiles]) + inYears = numpy.array([int(date[0:4]) for date in dates]) + inMonths = numpy.array([int(date[5:7]) for date in dates]) + totalMonths = 12*inYears + inMonths + + with xr.open_dataset(self.outputFile) as ds: + lastDate = str(ds.xtime_startMonthly[-1].values) + + lastYear = int(lastDate[0:4]) + lastMonth = int(lastDate[5:7]) + lastTotalMonths = 12*lastYear + lastMonth + + inputFiles = [] + for index, inputFile in enumerate(self.inputFiles): + if totalMonths[index] > lastTotalMonths: + inputFiles.append(inputFile) + + if len(inputFiles) == 0: + # nothing to do + return + else: + inputFiles = self.inputFiles + + variableList = self.variableList + ['xtime_startMonthly', + 'xtime_endMonthly'] + + args = ['ncrcat', '--record_append', '--no_tmp_fl', + '-v', ','.join(variableList)] + + printCommand = '{} {} ... {} {}'.format(' '.join(args), inputFiles[0], + inputFiles[-1], + self.outputFile) + args.extend(inputFiles) + args.append(self.outputFile) + + self.logger.info('running: {}'.format(printCommand)) + for handler in self.logger.handlers: + handler.flush() + + process = subprocess.Popen(args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + + if stdout: + self.logger.info(stdout) + if stderr: + for line in stderr.split('\n'): + self.logger.error(line) + + if process.returncode != 0: + raise subprocess.CalledProcessError(process.returncode, + ' '.join(args)) + + # }}} + # }}} + + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_open_mpas_dataset.py b/mpas_analysis/test/test_open_mpas_dataset.py new file mode 100644 index 000000000..9b449bac2 --- /dev/null +++ b/mpas_analysis/test/test_open_mpas_dataset.py @@ -0,0 +1,76 @@ +""" +Unit test infrastructure for the generalized_reader. + +Xylar Asay-Davis +02/16/2017 +""" + +import pytest +from mpas_analysis.test import TestCase, loaddatadir +from mpas_analysis.shared.io import open_mpas_dataset + + +@pytest.mark.usefixtures("loaddatadir") +class TestOpenMpasDataset(TestCase): + + def test_open_dataset_fn(self): + fileName = str(self.datadir.join('example_jan.nc')) + timestr = ['xtime_start', 'xtime_end'] + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + + for calendar in ['gregorian', 'gregorian_noleap']: + ds = open_mpas_dataset( + fileName=fileName, + calendar=calendar, + timeVariableNames=timestr, + variableList=variableList) + self.assertEqual(ds.data_vars.keys(), variableList) + + def test_start_end(self): + fileName = str(self.datadir.join('example_jan_feb.nc')) + timestr = ['xtime_start', 'xtime_end'] + variableList = \ + ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] + + for calendar in ['gregorian', 'gregorian_noleap']: + # all dates + ds = open_mpas_dataset( + fileName=fileName, + calendar=calendar, + timeVariableNames=timestr, + variableList=variableList, + startDate='0001-01-01', + endDate='9999-12-31') + self.assertEqual(len(ds.Time), 2) + + # just the first date + ds = open_mpas_dataset( + fileName=fileName, + calendar=calendar, + timeVariableNames=timestr, + variableList=variableList, + startDate='0005-01-01', + endDate='0005-02-01') + self.assertEqual(len(ds.Time), 1) + + # just the second date + ds = open_mpas_dataset( + fileName=fileName, + calendar=calendar, + timeVariableNames=timestr, + variableList=variableList, + startDate='0005-02-01', + endDate='0005-03-01') + self.assertEqual(len(ds.Time), 1) + + def test_open_process_climatology(self): + fileName = str(self.datadir.join('timeSeries.nc')) + calendar = 'gregorian_noleap' + open_mpas_dataset( + fileName=fileName, + calendar=calendar, + timeVariableNames=['xtime_startMonthly', 'xtime_endMonthly'], + variableList=['timeMonthly_avg_tThreshMLD']) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_open_mpas_dataset/example_jan.nc b/mpas_analysis/test/test_open_mpas_dataset/example_jan.nc new file mode 120000 index 000000000..da89ad526 --- /dev/null +++ b/mpas_analysis/test/test_open_mpas_dataset/example_jan.nc @@ -0,0 +1 @@ +../test_mpas_xarray/example_jan.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_open_mpas_dataset/example_jan_feb.nc b/mpas_analysis/test/test_open_mpas_dataset/example_jan_feb.nc new file mode 120000 index 000000000..67fdfa73b --- /dev/null +++ b/mpas_analysis/test/test_open_mpas_dataset/example_jan_feb.nc @@ -0,0 +1 @@ +../test_mpas_xarray/example_jan_feb.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_open_mpas_dataset/mpasMesh.nc b/mpas_analysis/test/test_open_mpas_dataset/mpasMesh.nc new file mode 120000 index 000000000..880a52c2e --- /dev/null +++ b/mpas_analysis/test/test_open_mpas_dataset/mpasMesh.nc @@ -0,0 +1 @@ +../test_interpolate/mpasMesh.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_open_mpas_dataset/timeSeries.nc b/mpas_analysis/test/test_open_mpas_dataset/timeSeries.nc new file mode 100644 index 000000000..e3034dded Binary files /dev/null and b/mpas_analysis/test/test_open_mpas_dataset/timeSeries.nc differ diff --git a/run_mpas_analysis b/run_mpas_analysis index a884e75d4..0d9d8c6c0 100755 --- a/run_mpas_analysis +++ b/run_mpas_analysis @@ -57,6 +57,7 @@ def build_analysis_list(config): # {{{ from mpas_analysis import ocean from mpas_analysis import sea_ice from mpas_analysis.shared.climatology import MpasClimatologyTask + from mpas_analysis.shared.time_series import MpasTimeSeriesTask # analyses will be a list of analysis classes analyses = [] @@ -64,19 +65,23 @@ def build_analysis_list(config): # {{{ # Ocean Analyses oceanClimatolgyTask = MpasClimatologyTask(config=config, componentName='ocean') + oceanTimeSeriesTask = MpasTimeSeriesTask(config=config, + componentName='ocean') analyses.append(oceanClimatolgyTask) analyses.append(ocean.ClimatologyMapMLD(config, oceanClimatolgyTask)) analyses.append(ocean.ClimatologyMapSST(config, oceanClimatolgyTask)) analyses.append(ocean.ClimatologyMapSSS(config, oceanClimatolgyTask)) - analyses.append(ocean.TimeSeriesOHC(config)) - analyses.append(ocean.TimeSeriesSST(config)) + analyses.append(ocean.TimeSeriesOHC(config, oceanTimeSeriesTask)) + analyses.append(ocean.TimeSeriesSST(config, oceanTimeSeriesTask)) analyses.append(ocean.MeridionalHeatTransport(config, oceanClimatolgyTask)) analyses.append(ocean.StreamfunctionMOC(config, oceanClimatolgyTask)) - analyses.append(ocean.IndexNino34(config)) + analyses.append(ocean.IndexNino34(config, oceanTimeSeriesTask)) # Sea Ice Analyses seaIceClimatolgyTask = MpasClimatologyTask(config=config, componentName='seaIce') + seaIceTimeSeriesTask = MpasTimeSeriesTask(config=config, + componentName='seaIce') analyses.append(seaIceClimatolgyTask) analyses.append(sea_ice.ClimatologyMapSeaIceConc(config, seaIceClimatolgyTask, @@ -90,7 +95,7 @@ def build_analysis_list(config): # {{{ analyses.append(sea_ice.ClimatologyMapSeaIceThick(config, seaIceClimatolgyTask, hemisphere='SH')) - analyses.append(sea_ice.TimeSeriesSeaIce(config)) + analyses.append(sea_ice.TimeSeriesSeaIce(config, seaIceTimeSeriesTask)) return analyses # }}}