From 9eca784967c9d33511d97bc3a10697d410546508 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Sat, 6 Jan 2018 18:43:02 +0100 Subject: [PATCH] Check for missing variables in mpas time series If variables are missing in an existing time series data set, delete the data set and start over rather than trying to append. --- .../time_series/mpas_time_series_task.py | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/mpas_analysis/shared/time_series/mpas_time_series_task.py b/mpas_analysis/shared/time_series/mpas_time_series_task.py index 3029d6f44..463017416 100644 --- a/mpas_analysis/shared/time_series/mpas_time_series_task.py +++ b/mpas_analysis/shared/time_series/mpas_time_series_task.py @@ -228,8 +228,8 @@ def _update_time_series_bounds_from_file_names(self): # {{{ if startYear != requestedStartYear or endYear != requestedEndYear: print("Warning: {} start and/or end year different from " - "requested\n" \ - "requestd: {:04d}-{:04d}\n" \ + "requested\n" + "requestd: {:04d}-{:04d}\n" "actual: {:04d}-{:04d}\n".format(section, requestedStartYear, requestedEndYear, @@ -275,33 +275,48 @@ def _compute_time_series_with_ncrcat(self): 'Note: this presumes use of the conda-forge ' 'channel.') + inputFiles = self.inputFiles if os.path.exists(self.outputFile): - # add only input files wiht times that aren't already in the - # output file - dates = sorted([fileName[-13:-6] for fileName in self.inputFiles]) - inYears = numpy.array([int(date[0:4]) for date in dates]) - inMonths = numpy.array([int(date[5:7]) for date in dates]) - totalMonths = 12*inYears + inMonths - + # make sure all the necessary variables are also present with xr.open_dataset(self.outputFile) as ds: - dates = [bytes.decode(name) for name in - ds.xtime_startMonthly.values] - lastDate = dates[-1] - - lastYear = int(lastDate[0:4]) - lastMonth = int(lastDate[5:7]) - lastTotalMonths = 12*lastYear + lastMonth - - inputFiles = [] - for index, inputFile in enumerate(self.inputFiles): - if totalMonths[index] > lastTotalMonths: - inputFiles.append(inputFile) - - if len(inputFiles) == 0: - # nothing to do - return - else: - inputFiles = self.inputFiles + updateSubset = True + for variableName in self.variableList: + if variableName not in ds.variables: + updateSubset = False + break + + if updateSubset: + # add only input files wiht times that aren't already in + # the output file + dates = sorted([fileName[-13:-6] for fileName in + self.inputFiles]) + inYears = numpy.array([int(date[0:4]) for date in dates]) + inMonths = numpy.array([int(date[5:7]) for date in dates]) + totalMonths = 12*inYears + inMonths + + dates = [bytes.decode(name) for name in + ds.xtime_startMonthly.values] + lastDate = dates[-1] + + lastYear = int(lastDate[0:4]) + lastMonth = int(lastDate[5:7]) + lastTotalMonths = 12*lastYear + lastMonth + + inputFiles = [] + for index, inputFile in enumerate(self.inputFiles): + if totalMonths[index] > lastTotalMonths: + inputFiles.append(inputFile) + + if len(inputFiles) == 0: + # nothing to do + return + else: + # there is an output file but it has the wrong variables + # so we need ot delete it. + self.logger.warning('Warning: deleting file {} because ' + 'some variables were missing'.format( + self.outputFile)) + os.remove(self.outputFile) variableList = self.variableList + ['xtime_startMonthly', 'xtime_endMonthly']