diff --git a/src/compo/mopitt_co_nc2ioda.py b/src/compo/mopitt_co_nc2ioda.py index 95117ce69..c247d4df5 100755 --- a/src/compo/mopitt_co_nc2ioda.py +++ b/src/compo/mopitt_co_nc2ioda.py @@ -162,6 +162,9 @@ def _read(self): for k in range(nlevs): varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_ak] = ak_tc_dimless[:, k][flg] + # add top vertice in IODA file, here it is 0hPa but can be different + # for other obs stream + for k in range(nlevs+1): varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_pr] = hPa2Pa * pr_gd[:, k][flg] @@ -182,7 +185,10 @@ def _read(self): varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_ak] = np.concatenate( (self.outdata[varname_ak], ak_tc_dimless[:, k][flg])) - varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') + # add top vertice in IODA file, here it is 0hPa but can be different + # for other obs stream + for k in range(nlevs+1): + varname_pr = ('pressure_level_'+str(k), 'RtrvlAncData') self.outdata[varname_pr] = np.concatenate( (self.outdata[varname_pr], hPa2Pa * pr_gd[:, k][flg])) diff --git a/src/compo/tropomi_no2_nc2ioda.py b/src/compo/tropomi_no2_nc2ioda.py index c9295ed1d..aa6398f50 100755 --- a/src/compo/tropomi_no2_nc2ioda.py +++ b/src/compo/tropomi_no2_nc2ioda.py @@ -30,28 +30,20 @@ ("datetime", "string"), ] -obsvars = { - 'nitrogendioxide_tropospheric_column': 'nitrogen_dioxide_in_tropospheric_column', - 'nitrogendioxide_total_column': 'nitrogen_dioxide_in_total_column', -} - AttrData = { 'converter': os.path.basename(__file__), - 'nvars': np.int32(len(obsvars)), + 'nvars': np.int32(1), } DimDict = { } -VarDims = { - 'nitrogen_dioxide_in_tropospheric_column': ['nlocs'], - 'nitrogen_dioxide_in_total_column': ['nlocs'], -} - class tropomi(object): - def __init__(self, filenames): + def __init__(self, filenames, columnType, obsVar): self.filenames = filenames + self.columnType = columnType + self.obsVar = obsVar self.varDict = defaultdict(lambda: defaultdict(dict)) self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict)) self.varAttrs = DefaultOrderedDict(lambda: DefaultOrderedDict(dict)) @@ -60,23 +52,28 @@ def __init__(self, filenames): # Open input file and read relevant info def _read(self): # set up variable names for IODA - for iodavar in ['nitrogen_dioxide_in_tropospheric_column', 'nitrogen_dioxide_in_total_column']: - self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName() - self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName() - self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName() - self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude' - self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude' - self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude' - self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2' - self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2' + if self.columnType == 'total': + iodavar = self.obsVar['nitrogendioxide_total_column'] + elif self.columnType == 'tropo': + iodavar = self.obsVar['nitrogendioxide_tropospheric_column'] + self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName() + self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName() + self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName() + self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude' + self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude' + self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude' + self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2' + self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2' # loop through input filenames first = True for f in self.filenames: ncd = nc.Dataset(f, 'r') + # get global attributes AttrData['date_time_string'] = ncd.getncattr('time_reference')[0:19]+'Z' AttrData['sensor'] = ncd.getncattr('sensor') AttrData['platform'] = ncd.getncattr('platform') + # many variables are time, scanline, ground_pixel # but others are just time, scanline lats = ncd.groups['PRODUCT'].variables['latitude'][:].ravel() @@ -84,6 +81,10 @@ def _read(self): qa_value = ncd.groups['PRODUCT'].variables['qa_value'][:] # 2D times = np.empty_like(qa_value, dtype=object) qa_value = qa_value.ravel() + + # adding ability to pre filter the data using the qa value + # documentation recommends using > 0.75 + flg = qa_value > 0.75 qc_flag = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS']\ .variables['processing_quality_flags'][:] qc_flag = qc_flag.ravel().astype('int32') @@ -91,80 +92,91 @@ def _read(self): for t in range(len(time1[0])): times[0, t, :] = time1[0, t][0:19]+'Z' times = times.ravel() - # need additional variable to use the averaging kernel for DA - kernel_err = ncd.groups['PRODUCT'].\ - variables['nitrogendioxide_tropospheric_column_precision_kernel'][:].ravel() - kernel_err_total = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].\ - variables['nitrogendioxide_total_column_precision_kernel'][:].ravel() trop_layer = ncd.groups['PRODUCT'].variables['tm5_tropopause_layer_index'][:].ravel() total_airmass = ncd.groups['PRODUCT'].variables['air_mass_factor_total'][:].ravel() trop_airmass = ncd.groups['PRODUCT'].\ variables['air_mass_factor_troposphere'][:].ravel() + # get info to construct the pressure level array ps = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['INPUT_DATA'].\ variables['surface_pressure'][:] + # bottom of layer is vertice 0, very top layer is TOA (0hPa) - ak = ncd.groups['PRODUCT'].variables['tm5_constant_a'][:, 0] - bk = ncd.groups['PRODUCT'].variables['tm5_constant_b'][:, 0] + ak = ncd.groups['PRODUCT'].variables['tm5_constant_a'][:, :] + bk = ncd.groups['PRODUCT'].variables['tm5_constant_b'][:, :] + # grab the averaging kernel avg_kernel = ncd.groups['PRODUCT'].variables['averaging_kernel'][:] nlevs = len(avg_kernel[0, 0, 0]) AttrData['averaging_kernel_levels'] = np.int32(nlevs) + # scale the avk using AMF ratio and tropopause level for tropo column + nlocf = len(trop_layer[flg]) + scaleAK = np.ones((nlocf, nlevs), dtype=np.float32) + if self.columnType == 'tropo': + # do not loop over nlocs here this makes the execution very slow + for k in range(nlevs): + scaleAK[..., k][np.full((nlocf), k, dtype=int) > trop_layer[flg]] = 0 + scaleAK[..., k] *= total_airmass[flg] / trop_airmass[flg] + if first: # add metadata variables - self.outdata[('datetime', 'MetaData')] = times - self.outdata[('latitude', 'MetaData')] = lats - self.outdata[('longitude', 'MetaData')] = lons - self.outdata[('quality_assurance_value', 'MetaData')] = qa_value - self.outdata[('troposphere_layer_index', 'RtrvlAncData')] = trop_layer - self.outdata[('air_mass_factor_total', 'RtrvlAncData')] = total_airmass - self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')] = trop_airmass + self.outdata[('datetime', 'MetaData')] = times[flg] + self.outdata[('latitude', 'MetaData')] = lats[flg] + self.outdata[('longitude', 'MetaData')] = lons[flg] + self.outdata[('quality_assurance_value', 'MetaData')] = qa_value[flg] for k in range(nlevs): varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') - self.outdata[varname_ak] = avg_kernel[..., k].ravel() + self.outdata[varname_ak] = avg_kernel[..., k].ravel()[flg] * scaleAK[..., k] varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') - self.outdata[varname_pr] = ak[k] + bk[k]*ps[...].ravel() + self.outdata[varname_pr] = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg] + # add top vertice in IODA file, here it is 0hPa but can be different + # for other obs stream + varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData') + self.outdata[varname_pr] = ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel() + else: self.outdata[('datetime', 'MetaData')] = np.concatenate(( - self.outdata[('datetime', 'MetaData')], times)) + self.outdata[('datetime', 'MetaData')], times[flg])) self.outdata[('latitude', 'MetaData')] = np.concatenate(( - self.outdata[('latitude', 'MetaData')], lats)) + self.outdata[('latitude', 'MetaData')], lats[flg])) self.outdata[('longitude', 'MetaData')] = np.concatenate(( - self.outdata[('longitude', 'MetaData')], lons)) + self.outdata[('longitude', 'MetaData')], lons[flg])) self.outdata[('quality_assurance_value', 'MetaData')] = np.concatenate(( - self.outdata[('quality_assurance_value', 'MetaData')], qa_value)) - self.outdata[('troposphere_layer_index', 'RtrvlAncData')] = np.concatenate(( - self.outdata[('troposphere_layer_index', 'RtrvlAncData')], trop_layer)) - self.outdata[('air_mass_factor_total', 'RtrvlAncData')] = np.concatenate(( - self.outdata[('air_mass_factor_total', 'RtrvlAncData')], total_airmass)) - self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')] = np.concatenate(( - self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')], trop_airmass)) + self.outdata[('quality_assurance_value', 'MetaData')], qa_value[flg])) for k in range(nlevs): varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_ak] = np.concatenate( - (self.outdata[varname_ak], avg_kernel[..., k].ravel())) + (self.outdata[varname_ak], avg_kernel[..., k].ravel()[flg] * scaleAK[..., k])) varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData') self.outdata[varname_pr] = np.concatenate( - (self.outdata[varname_pr], ak[k] + bk[k]*ps[...].ravel())) - for ncvar, iodavar in obsvars.items(): + (self.outdata[varname_pr], ak[k] + bk[k]*ps[...].ravel()[flg])) + varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData') + self.outdata[varname_pr] = np.concatenate( + (self.outdata[varname_pr], ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()[flg])) + + for ncvar, iodavar in self.obsVar.items(): + if ncvar in ['nitrogendioxide_tropospheric_column']: - data = ncd.groups['PRODUCT'].variables[ncvar][:].ravel() - err = ncd.groups['PRODUCT'].variables[ncvar+'_precision'][:].ravel() + data = ncd.groups['PRODUCT'].variables[ncvar][:].ravel()[flg] + err = ncd.groups['PRODUCT'].variables[ncvar+'_precision'][:].ravel()[flg] else: - data = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar][:].ravel() - err = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar+'_precision'][:].ravel() + data = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar][:].ravel()[flg] + err = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar+'_precision'][:].ravel()[flg] + if first: self.outdata[self.varDict[iodavar]['valKey']] = data self.outdata[self.varDict[iodavar]['errKey']] = err - self.outdata[self.varDict[iodavar]['qcKey']] = qc_flag + self.outdata[self.varDict[iodavar]['qcKey']] = qc_flag[flg] else: self.outdata[self.varDict[iodavar]['valKey']] = np.concatenate( (self.outdata[self.varDict[iodavar]['valKey']], data)) self.outdata[self.varDict[iodavar]['errKey']] = np.concatenate( (self.outdata[self.varDict[iodavar]['errKey']], err)) self.outdata[self.varDict[iodavar]['qcKey']] = np.concatenate( - (self.outdata[self.varDict[iodavar]['qcKey']], qc_flag)) + (self.outdata[self.varDict[iodavar]['qcKey']], qc_flag[flg])) + first = False + DimDict['nlocs'] = len(self.outdata[('datetime', 'MetaData')]) AttrData['nlocs'] = np.int32(DimDict['nlocs']) @@ -180,7 +192,7 @@ def main(): # get command line arguments parser = argparse.ArgumentParser( description=( - 'Reads TROPOMI NO2 netCDF files provided by NESDIS' + 'Reads TROPOMI NO2 netCDF files: official Copernicus product' 'and converts into IODA formatted output files. Multiple' 'files are able to be concatenated.') ) @@ -194,17 +206,41 @@ def main(): '-o', '--output', help="path of IODA output file", type=str, required=True) + required.add_argument( + '-c', '--column', + help="type of column: total or tropo", + type=str, required=True) args = parser.parse_args() + if args.column == "tropo": + + obsVar = { + 'nitrogendioxide_tropospheric_column': 'nitrogen_dioxide_in_tropospheric_column' + } + + varDims = { + 'nitrogen_dioxide_in_tropospheric_column': ['nlocs'] + } + + elif args.column == "total": + + obsVar = { + 'nitrogendioxide_total_column': 'nitrogen_dioxide_in_total_column' + } + + varDims = { + 'nitrogen_dioxide_in_total_column': ['nlocs'] + } + # Read in the NO2 data - no2 = tropomi(args.input) + no2 = tropomi(args.input, args.column, obsVar) # setup the IODA writer writer = iconv.IodaWriter(args.output, locationKeyList, DimDict) # write everything out - writer.BuildIoda(no2.outdata, VarDims, no2.varAttrs, AttrData) + writer.BuildIoda(no2.outdata, varDims, no2.varAttrs, AttrData) if __name__ == '__main__': diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5b0838d5a..3bfed22cd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -100,7 +100,8 @@ list( APPEND test_output testoutput/ioda.NC001007.2020031012.nc testoutput/viirs_aod.nc testoutput/viirs_bias.nc - testoutput/tropomi_no2.nc + testoutput/tropomi_no2_total.nc + testoutput/tropomi_no2_tropo.nc testoutput/mopitt_co.nc testoutput/modis_aod.nc testoutput/imssnow_scf.nc @@ -858,7 +859,7 @@ ecbuild_add_test( TARGET test_${PROJECT_NAME}_satbias_viirs_aod -o testrun/viirs_bias.nc" viirs_bias.nc ${IODA_CONV_COMP_TOL_ZERO}) -ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2 +ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2_total TYPE SCRIPT ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}" COMMAND bash @@ -866,8 +867,21 @@ ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2 netcdf "${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/tropomi_no2_nc2ioda.py -i testinput/tropomi_no2.nc - -o testrun/tropomi_no2.nc" - tropomi_no2.nc ${IODA_CONV_COMP_TOL_ZERO}) + -o testrun/tropomi_no2_total.nc + -c total" + tropomi_no2_total.nc ${IODA_CONV_COMP_TOL_ZERO}) + +ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2_tropo + TYPE SCRIPT + ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}" + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/tropomi_no2_nc2ioda.py + -i testinput/tropomi_no2.nc + -o testrun/tropomi_no2_tropo.nc + -c tropo" + tropomi_no2_tropo.nc ${IODA_CONV_COMP_TOL_ZERO}) ecbuild_add_test( TARGET test_${PROJECT_NAME}_mopitt_co TYPE SCRIPT diff --git a/test/testoutput/mopitt_co.nc b/test/testoutput/mopitt_co.nc index 75a31a4e8..4a4b12dab 100644 --- a/test/testoutput/mopitt_co.nc +++ b/test/testoutput/mopitt_co.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:701e5b8d31f6b56d5d3113a618f05cf99c7c59611333c3904828df8ff314b9ad -size 210411 +oid sha256:3a6c239fbb5b72df4ba40a0f3fd534e485569b60f5d4e2e84bc239cd90bd230a +size 210375 diff --git a/test/testoutput/tropomi_no2.nc b/test/testoutput/tropomi_no2.nc deleted file mode 100644 index 7b2697a0a..000000000 --- a/test/testoutput/tropomi_no2.nc +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0a528ea147d921f7493fd9e266ab4f99aa8cfbc9a342df8d6665ee43da8b02f5 -size 220745 diff --git a/test/testoutput/tropomi_no2_total.nc b/test/testoutput/tropomi_no2_total.nc new file mode 100644 index 000000000..fff9c8e77 --- /dev/null +++ b/test/testoutput/tropomi_no2_total.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4bc18e703b14cc9959cd89f43b14d5cb48c67c0f730344e4ea6bb6579b91ca9 +size 116243 diff --git a/test/testoutput/tropomi_no2_tropo.nc b/test/testoutput/tropomi_no2_tropo.nc new file mode 100644 index 000000000..d0bc65376 --- /dev/null +++ b/test/testoutput/tropomi_no2_tropo.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be93f2e4673d72decc5a1710ba80e695cfdc27de0527a10dfc0e67ecf7f5a1f4 +size 108155