diff --git a/src/compo/mopitt_co_nc2ioda.py b/src/compo/mopitt_co_nc2ioda.py index 3d2d01972..b6edd2ce5 100755 --- a/src/compo/mopitt_co_nc2ioda.py +++ b/src/compo/mopitt_co_nc2ioda.py @@ -30,22 +30,20 @@ ("dateTime", "long"), ] -varname_co = 'carbonmonoxideColumn' - -# This is apparently not used obsvars = { 'carbonmonoxide_total_column': 'carbon_monoxide_in_total_column', } AttrData = { 'converter': os.path.basename(__file__), + 'nvars': np.int32(len(obsvars)), } DimDict = { } VarDims = { - varname_co: ['Location'], + 'carbon_monoxide_in_total_column': ['Location'], } # constants @@ -66,7 +64,7 @@ def __init__(self, filenames, time_range): def _read(self): # set up variable names for IODA - for iodavar in [varname_co, ]: + for iodavar in ['carbon_monoxide_in_total_column']: self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName() self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName() self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName() @@ -75,6 +73,7 @@ def _read(self): self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude' self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2' self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2' + self.varAttrs[iodavar, iconv.OqcName()]['units'] = 'unitless' # loop through input filenames first = True for f in self.filenames: @@ -103,12 +102,11 @@ def _read(self): qa = dat.variables['RetrievalAnomalyDiagnostic'][:].sum(axis=1) # time data, we don't need precision beyond the second - inittime = datetime.strptime(StartDateTime[:19], "%Y-%m-%dT%H:%M:%S") - time_units = 'seconds since ' + StartDateTime[:19] + 'Z' - self.varAttrs[('dateTime', 'MetaData')]['units'] = time_units - times = [int(i) - int(secd[0]) for i in secd] - times = np.array(times) - AttrData['datetimeReference'] = StartDateTime[:19] + 'Z' + inittime = datetime.strptime(StartDateTime, "%Y-%m-%dT%H:%M:%S.%fZ") + times = np.array([datetime.strftime( + inittime + timedelta(seconds=int(i)) - timedelta(seconds=int(secd[0])), + "%Y-%m-%dT%H:%M:%S")+"Z" for i in secd], dtype=object) + AttrData['date_time_string'] = times[0] # get ak AttrData['averaging_kernel_levels'] = np.int32(nlevs) @@ -158,17 +156,15 @@ def _read(self): # date range to fit DA window date_start = datetime.strptime(self.time_range[0], "%Y%m%d%H") - time_offset1 = round((date_start - inittime).total_seconds()) date_end = datetime.strptime(self.time_range[1], "%Y%m%d%H") - time_offset2 = round((date_end - inittime).total_seconds()) - flg = [] - for n, t in enumerate(times): - if t >= time_offset1 and t < time_offset2 and qaf[n]: - flg.append(n) + date_list = [datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") for date in times] + tsf = [(date_i >= date_start) & (date_i < date_end) for date_i in date_list] + + flg = np.logical_and(qaf, tsf) if first: # add metadata variables - self.outdata[('dateTime', 'MetaData')] = times[flg].astype(np.int64) + self.outdata[('dateTime', 'MetaData')] = times[flg] self.outdata[('latitude', 'MetaData')] = lats[flg] self.outdata[('longitude', 'MetaData')] = lons[flg] self.outdata[('apriori_term', 'RtrvlAncData')] = ap_tc[flg] @@ -183,42 +179,44 @@ def _read(self): self.outdata[self.varDict[iodavar]['valKey']] = xr_tc[flg] self.outdata[self.varDict[iodavar]['errKey']] = er_tc[flg] - self.outdata[self.varDict[iodavar]['qcKey']] = qa[flg].astype(np.int32) + self.outdata[self.varDict[iodavar]['qcKey']] = qa[flg] else: - self.outdata[('datetime', 'MetaData')] = np.concatenate( - self.outdata[('datetime', 'MetaData')], times[flg].astype(np.int64)) - self.outdata[('latitude', 'MetaData')] = np.concatenate( - self.outdata[('latitude', 'MetaData')], lats[flg]) - self.outdata[('longitude', 'MetaData')] = np.concatenate( - self.outdata[('longitude', 'MetaData')], lons[flg]) - self.outdata[('apriori_term', 'RtrvlAncData')] = np.concatenate( - self.outdata[('apriori_term', 'RtrvlAncData')], ap_tc[flg]) + self.outdata[('dateTime', 'MetaData')] = np.concatenate(( + self.outdata[('dateTime', 'MetaData')], times[flg])) + self.outdata[('latitude', 'MetaData')] = np.concatenate(( + self.outdata[('latitude', 'MetaData')], lats[flg])) + self.outdata[('longitude', 'MetaData')] = np.concatenate(( + self.outdata[('longitude', 'MetaData')], lons[flg])) + self.outdata[('apriori_term', 'RtrvlAncData')] = np.concatenate(( + self.outdata[('apriori_term', 'RtrvlAncData')], ap_tc[flg])) for k in range(nlevs): varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_ak] = np.concatenate( - self.outdata[varname_ak], ak_tc_dimless[:, k][flg]) + (self.outdata[varname_ak], ak_tc_dimless[:, k][flg])) # add top vertice in IODA file, here it is 0hPa but can be different # for other obs stream for k in range(nlevs+1): varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_pr] = np.concatenate( - self.outdata[varname_pr], hPa2Pa * pr_gd[:, k][flg]) + (self.outdata[varname_pr], hPa2Pa * pr_gd[:, k][flg])) self.outdata[self.varDict[iodavar]['valKey']] = np.concatenate( - self.outdata[self.varDict[iodavar]['valKey']], xr_tc[flg]) + (self.outdata[self.varDict[iodavar]['valKey']], xr_tc[flg])) self.outdata[self.varDict[iodavar]['errKey']] = np.concatenate( - self.outdata[self.varDict[iodavar]['errKey']], er_tc[flg]) + (self.outdata[self.varDict[iodavar]['errKey']], er_tc[flg])) self.outdata[self.varDict[iodavar]['qcKey']] = np.concatenate( - self.outdata[self.varDict[iodavar]['qcKey']], qa[flg]).astype(np.int32) + (self.outdata[self.varDict[iodavar]['qcKey']], qa[flg])) first = False DimDict['Location'] = len(self.outdata[('dateTime', 'MetaData')]) + AttrData['Location'] = np.int32(DimDict['Location']) for k in range(nlevs): varname = 'averaging_kernel_level_'+str(k+1) vkey = (varname, 'RtrvlAncData') self.varAttrs[vkey]['coordinates'] = 'longitude latitude' + self.varAttrs[vkey]['units'] = '' def main(): diff --git a/src/compo/tropomi_no2_co_nc2ioda.py b/src/compo/tropomi_no2_co_nc2ioda.py index 258198c22..1103e241e 100755 --- a/src/compo/tropomi_no2_co_nc2ioda.py +++ b/src/compo/tropomi_no2_co_nc2ioda.py @@ -25,27 +25,19 @@ from orddicts import DefaultOrderedDict locationKeyList = [ - ("latitude", "float", "degrees_north"), - ("longitude", "float", "degrees_east"), - ("dateTime", "long", "seconds since 1970-01-01T00:00:00Z"), + ("latitude", "float"), + ("longitude", "float"), + ("dateTime", "string"), ] -meta_keys = [m_item[0] for m_item in locationKeyList] AttrData = { - 'converter': os.path.basename(__file__) + 'converter': os.path.basename(__file__), + 'nvars': np.int32(1), } DimDict = { } -varDims = { -} - -iso8601_string = locationKeyList[meta_keys.index('dateTime')][2] -epoch = datetime.fromisoformat(iso8601_string[14:-1]) - -float_missing_value = nc.default_fillvals['f4'] - class tropomi(object): def __init__(self, filenames, varname, columnType, qa_flg, thin, obsVar): @@ -69,9 +61,9 @@ def _read(self): self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName() self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName() self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName() - self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude referenceLevel' - self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude referenceLevel' - self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude referenceLevel' + self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude' + self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude' + self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude' self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2' self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2' # loop through input filenames @@ -80,7 +72,7 @@ def _read(self): ncd = nc.Dataset(f, 'r') # get global attributes - AttrData['datetimeReference'] = ncd.getncattr('time_reference')[0:19]+'Z' + AttrData['date_time_string'] = ncd.getncattr('time_reference')[0:19]+'Z' AttrData['sensor'] = ncd.getncattr('sensor') AttrData['platform'] = ncd.getncattr('platform') @@ -89,7 +81,7 @@ def _read(self): lats = ncd.groups['PRODUCT'].variables['latitude'][:].ravel() lons = ncd.groups['PRODUCT'].variables['longitude'][:].ravel() qa_value = ncd.groups['PRODUCT'].variables['qa_value'][:] # 2D - time_offsets = np.empty_like(qa_value, dtype=np.int64) + times = np.empty_like(qa_value, dtype=object) qa_value = qa_value.ravel() # adding ability to pre filter the data using the qa value @@ -102,11 +94,8 @@ def _read(self): qc_flag = qc_flag.ravel().astype('int32') time1 = ncd.groups['PRODUCT'].variables['time_utc'][:] for t in range(len(time1[0])): - this_datetime = datetime.fromisoformat(time1[0, t][0:19]) - time_offset = round((this_datetime - epoch).total_seconds()) - time_offsets[0, t, :] = time_offset - time_offsets = time_offsets.ravel() - + times[0, t, :] = time1[0, t][0:19]+'Z' + times = times.ravel() if self.varname == 'no2': trop_layer = ncd.groups['PRODUCT'].variables['tm5_tropopause_layer_index'][:].ravel() total_airmass = ncd.groups['PRODUCT'].variables['air_mass_factor_total'][:].ravel() @@ -128,15 +117,12 @@ def _read(self): groups['DETAILED_RESULTS'].variables['column_averaging_kernel'][:] nlevs = len(avg_kernel[0, 0, 0]) + AttrData['averaging_kernel_levels'] = np.int32(nlevs) # scale the avk using AMF ratio and tropopause level for tropo column - if self.varname == 'no2': - nlocf = len(trop_layer[flg]) - elif self.varname == 'co': - nlocf = len(lats[flg]) - + nlocf = len(lats[flg]) scaleAK = np.ones((nlocf, nlevs), dtype=np.float32) - if self.columnType == 'tropo': + if self.varname == 'no2' and self.columnType == 'tropo': # do not loop over nlocs here this makes the execution very slow for k in range(nlevs): scaleAK[..., k][np.full((nlocf), k, dtype=int) > trop_layer[flg]] = 0 @@ -144,60 +130,58 @@ def _read(self): if first: # add metadata variables - self.outdata[('dateTime', 'MetaData')] = time_offsets[flg] + self.outdata[('dateTime', 'MetaData')] = times[flg] self.outdata[('latitude', 'MetaData')] = lats[flg] self.outdata[('longitude', 'MetaData')] = lons[flg] - self.outdata[('qualityFlags', 'QualityMarker')] = qa_value[flg] - self.outdata[('avgKernelLevel', 'RetrievalData')] = np.full((nlocf, nlevs+1), float_missing_value, dtype=np.float32) - self.outdata[('referenceLevel', 'RetrievalData')] = np.full((nlocf, nlevs+1), float_missing_value, dtype=np.float32) + self.outdata[('quality_assurance_value', 'MetaData')] = qa_value[flg] for k in range(nlevs): - self.outdata[('avgKernelLevel', 'RetrievalData')][..., k] = avg_kernel[..., k].ravel()[flg] * scaleAK[..., k] - vname = ('referenceLevel', 'RetrievalData') + varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') + self.outdata[varname_ak] = avg_kernel[..., k].ravel()[flg] * scaleAK[..., k] + varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') if self.varname == 'no2': - self.outdata[vname][..., k] = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg] + self.outdata[varname_pr] = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg] elif self.varname == 'co': rev_k = nlevs-k-1 - self.outdata[vname][..., rev_k] = preslv[..., rev_k].ravel()[flg] - + self.outdata[varname_pr] = preslv[..., rev_k].ravel()[flg] # add top vertice in IODA file, here it is 0hPa but can be different # for other obs stream + varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData') if self.varname == 'no2': - self.outdata[vname][..., nlevs] = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg] + self.outdata[varname_pr] = ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel() elif self.varname == 'co': - self.outdata[vname][..., nlevs] = np.zeros(nlocf, dtype=np.float32) + self.outdata[varname_pr] = np.zeros((nlocf), dtype=np.float32) else: self.outdata[('dateTime', 'MetaData')] = np.concatenate(( - self.outdata[('dateTime', 'MetaData')], time_offsets[flg])) + self.outdata[('dateTime', 'MetaData')], times[flg])) self.outdata[('latitude', 'MetaData')] = np.concatenate(( self.outdata[('latitude', 'MetaData')], lats[flg])) self.outdata[('longitude', 'MetaData')] = np.concatenate(( self.outdata[('longitude', 'MetaData')], lons[flg])) - self.outdata[('qualityFlags', 'QualityMarker')] = np.concatenate(( - self.outdata[('qualityFlags', 'QualityMarker')], qa_value[flg])) + self.outdata[('quality_assurance_value', 'MetaData')] = np.concatenate(( + self.outdata[('quality_assurance_value', 'MetaData')], qa_value[flg])) for k in range(nlevs): - vname = ('avgKernelLevel', 'RetrievalData') - self.outdata[vname][..., k] = np.concatenate(( - self.outdata[vname][..., k], avg_kernel[..., k].ravel()[flg] * scaleAK[..., k]), axis=1) - vname = ('referenceLevel', 'RetrievalData') - if self.varname == 'no2': - self.outdata[vname][..., k] = np.concatenate(( - self.outdata[vname][..., k], ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg]), axis=1) - elif self.varname == 'co': - rev_k = nlevs-k-1 - self.outdata[vname][..., rev_k] = np.concatenate(( - self.outdata[vname][..., rev_k], preslv[..., rev_k].ravel()[flg]), axis=1) - + varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') + self.outdata[varname_ak] = np.concatenate( + (self.outdata[varname_ak], avg_kernel[..., k].ravel()[flg] * scaleAK[..., k])) + varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') + if varname == 'no2': + pr_data = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg] + elif varname == 'co': + pr_data = preslv[..., k].ravel()[flg] + self.outdata[varname_pr] = np.concatenate((self.outdata[varname_pr], pr_data)) + varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData') if self.varname == 'no2': - self.outdata[vname][..., nlevs] = np.concatenate(( - self.outdata[vname][..., nlevs], ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()[flg]), axis=1) + self.outdata[varname_pr] = np.concatenate( + (self.outdata[varname_pr], ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()[flg])) elif self.varname == 'co': - self.outdata[vname][..., nlevs] = np.concatenate(( - self.outdata[vname][..., nlevs], np.zeros(nlocf, dtype=np.float32)), axis=1) + self.outdata[varname_pr] = np.concatenate( + (self.outdata[varname_pr], np.zeros((nlocf), dtype=np.float32))) for ncvar, iodavar in self.obsVar.items(): - if ('tropospher' in ncvar and self.varname == "no2") or self.varname == "co": + if ncvar in ['nitrogendioxide_tropospheric_column', + 'carbonmonoxide_total_column']: data = ncd.groups['PRODUCT'].variables[ncvar][:].ravel()[flg] err = ncd.groups['PRODUCT'].variables[ncvar+'_precision'][:].ravel()[flg] else: @@ -218,20 +202,14 @@ def _read(self): first = False - self.varAttrs[('dateTime', 'MetaData')]['units'] = locationKeyList[meta_keys.index('dateTime')][2] - DimDict['Location'] = len(self.outdata[('dateTime', 'MetaData')]) - DimDict['averagingKernelLevels'] = np.int32(nlevs+1) - - # Repeat all Location values on all levels for various MetaData - self.outdata[('dateTime', 'MetaData')] = np.tile(self.outdata[('dateTime', 'MetaData')], (nlevs+1, 1)) - self.outdata[('latitude', 'MetaData')] = np.tile(self.outdata[('latitude', 'MetaData')], (nlevs+1, 1)) - self.outdata[('longitude', 'MetaData')] = np.tile(self.outdata[('longitude', 'MetaData')], (nlevs+1, 1)) - self.outdata[('qualityFlags', 'QualityMarker')] = np.tile(self.outdata[('qualityFlags', 'QualityMarker')], (nlevs+1, 1)) + AttrData['Location'] = np.int32(DimDict['Location']) - for key in list(self.outdata.keys()): - vname = key[0] - varDims[vname] = ['Location', 'averagingKernelLevels'] + for k in range(nlevs): + varname = 'averaging_kernel_level_'+str(k+1) + vkey = (varname, 'RtrvlAncData') + self.varAttrs[vkey]['coordinates'] = 'longitude latitude' + self.varAttrs[vkey]['units'] = '' def main(): @@ -280,20 +258,37 @@ def main(): args = parser.parse_args() - if args.variable == "no2": - var_longname = 'nitrogendioxide' - elif args.variable == "co": - var_longname = 'carbonmonoxide' + if args.variable == "co": + var_in_name = 'carbonmonoxide' + var_out_name = 'carbon_monoxide' if args.column == "tropo": print('CO is only available for total column, reset column to total', flush=1) args.column = 'total' + elif args.variable == "no2": + var_in_name = 'nitrogendioxide' + var_out_name = 'nitrogen_dioxide' if args.column == "tropo": - obsVar = {var_longname+'_tropospheric_column': var_longname+'Column'} + + obsVar = { + var_in_name+'_tropospheric_column': var_out_name+'_in_tropospheric_column' + } + + varDims = { + var_out_name+'_in_tropospheric_column': ['Location'] + } + elif args.column == "total": - obsVar = {var_longname+'_total_column': var_longname+'Total'} - # Read in the NO2 or CO data + obsVar = { + var_in_name+'_total_column': var_out_name+'_in_total_column' + } + + varDims = { + var_out_name+'_in_total_column': ['Location'] + } + + # Read in the NO2 data var = tropomi(args.input, args.variable, args.column, args.qa_value, args.thin, obsVar) # setup the IODA writer diff --git a/test/testoutput/mopitt_co.nc b/test/testoutput/mopitt_co.nc index d4c5f64d4..097ba5d2f 100644 --- a/test/testoutput/mopitt_co.nc +++ b/test/testoutput/mopitt_co.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f96d9d4b64d456cabd23e2d874e97af2f0d0351d8b29e6811c6217f9a75dac0 -size 108524 +oid sha256:11fb3f330ba64cd6446096b922b665db8b9864666d316a8385ec4f213a634d93 +size 111294 diff --git a/test/testoutput/tropomi_co_total.nc b/test/testoutput/tropomi_co_total.nc index 9e70d23a6..23e219f3d 100644 --- a/test/testoutput/tropomi_co_total.nc +++ b/test/testoutput/tropomi_co_total.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fd14d476a2ec14a460381976017319b067ac30ad2fd193f3036ce105e40ae64 -size 70083 +oid sha256:cd07fabec782889b00e553e8db98479dd0d541fc04cbaac634174f1adf629e1e +size 164142 diff --git a/test/testoutput/tropomi_no2_total.nc b/test/testoutput/tropomi_no2_total.nc index 48b9c0b3a..05c7042cb 100644 --- a/test/testoutput/tropomi_no2_total.nc +++ b/test/testoutput/tropomi_no2_total.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bef06d750459b4d017b9f485c0c012a93284622f6bc4fee395552d7cf503446 -size 52948 +oid sha256:ba43a0f7032c201459ef1b679034c8c9e4dcf0a9ce85d59bebb459efc874d793 +size 111804 diff --git a/test/testoutput/tropomi_no2_tropo.nc b/test/testoutput/tropomi_no2_tropo.nc index 866c7fdbf..87907975e 100644 --- a/test/testoutput/tropomi_no2_tropo.nc +++ b/test/testoutput/tropomi_no2_tropo.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bab1e3756c4fee658bdc3e4ab10fd3c72615552010911119fbcb4a1673c09a80 -size 46066 +oid sha256:4c455eb69544a740f81e6862808a201e37b33420f17fb508001db4aedc20f84e +size 103449