Skip to content
8 changes: 7 additions & 1 deletion src/compo/mopitt_co_nc2ioda.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ def _read(self):
for k in range(nlevs):
varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_ak] = ak_tc_dimless[:, k][flg]
# add top vertice in IODA file, here it is 0hPa but can be different
# for other obs stream
for k in range(nlevs+1):
varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_pr] = hPa2Pa * pr_gd[:, k][flg]

Expand All @@ -182,7 +185,10 @@ def _read(self):
varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_ak] = np.concatenate(
(self.outdata[varname_ak], ak_tc_dimless[:, k][flg]))
varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData')
# add top vertice in IODA file, here it is 0hPa but can be different
# for other obs stream
for k in range(nlevs+1):
varname_pr = ('pressure_level_'+str(k), 'RtrvlAncData')
self.outdata[varname_pr] = np.concatenate(
(self.outdata[varname_pr], hPa2Pa * pr_gd[:, k][flg]))

Expand Down
154 changes: 95 additions & 59 deletions src/compo/tropomi_no2_nc2ioda.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,20 @@
("datetime", "string"),
]

obsvars = {
'nitrogendioxide_tropospheric_column': 'nitrogen_dioxide_in_tropospheric_column',
'nitrogendioxide_total_column': 'nitrogen_dioxide_in_total_column',
}

AttrData = {
'converter': os.path.basename(__file__),
'nvars': np.int32(len(obsvars)),
'nvars': np.int32(1),
}

DimDict = {
}

VarDims = {
'nitrogen_dioxide_in_tropospheric_column': ['nlocs'],
'nitrogen_dioxide_in_total_column': ['nlocs'],
}


class tropomi(object):
def __init__(self, filenames):
def __init__(self, filenames, columnType, obsVar):
self.filenames = filenames
self.columnType = columnType
self.obsVar = obsVar
self.varDict = defaultdict(lambda: defaultdict(dict))
self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
self.varAttrs = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
Expand All @@ -60,111 +52,131 @@ def __init__(self, filenames):
# Open input file and read relevant info
def _read(self):
# set up variable names for IODA
for iodavar in ['nitrogen_dioxide_in_tropospheric_column', 'nitrogen_dioxide_in_total_column']:
self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName()
self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName()
self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName()
self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2'
self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2'
if self.columnType == 'total':
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you're passing this through in obsVar, do you need this if statement here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes as what is passed is the dictionary defined in main().

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or do you mean total should be as default if no -c/--column option ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of passing a dict, since you are setting it in an if, I would send it like so:

obsVar = {'inputvar': 'nitrogendioxide_total_column',
                    'iodavar': 'nitrogen_dioxide_in_total_column',
    }

so then you don't need the if, elif here, just use the keys

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iodavar = self.obsVar['inputvar']

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I see how that complicates the loop below, so I guess it's fine

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to keep that loop as I think we can still use the dictionary in case we need multiple obsvar in the ioda file.
But this might be irrelevant in the future. I'd just like to keep this feature for now, just in case, before we start generalizing this converter to other tropoomi and omi species or KNMI retrieval products in general, as I assume they are providing the same nc file template.

iodavar = self.obsVar['nitrogendioxide_total_column']
elif self.columnType == 'tropo':
iodavar = self.obsVar['nitrogendioxide_tropospheric_column']
self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName()
self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName()
self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName()
self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude'
self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mol m-2'
self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mol m-2'
# loop through input filenames
first = True
for f in self.filenames:
ncd = nc.Dataset(f, 'r')

# get global attributes
AttrData['date_time_string'] = ncd.getncattr('time_reference')[0:19]+'Z'
AttrData['sensor'] = ncd.getncattr('sensor')
AttrData['platform'] = ncd.getncattr('platform')

# many variables are time, scanline, ground_pixel
# but others are just time, scanline
lats = ncd.groups['PRODUCT'].variables['latitude'][:].ravel()
lons = ncd.groups['PRODUCT'].variables['longitude'][:].ravel()
qa_value = ncd.groups['PRODUCT'].variables['qa_value'][:] # 2D
times = np.empty_like(qa_value, dtype=object)
qa_value = qa_value.ravel()

# adding ability to pre filter the data using the qa value
# documentation recommends using > 0.75
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the documentation from the data provider? If so it might be good to include, for the next developer, a URL reference to where the recommendation exists.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update and for your patience with my slow response. I've just got the one comment that I am viewing as optional.

No problems, thanks a lot for the review!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will put the url of the documentation on the next PR on this.

flg = qa_value > 0.75
qc_flag = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS']\
.variables['processing_quality_flags'][:]
qc_flag = qc_flag.ravel().astype('int32')
time1 = ncd.groups['PRODUCT'].variables['time_utc'][:]
for t in range(len(time1[0])):
times[0, t, :] = time1[0, t][0:19]+'Z'
times = times.ravel()
# need additional variable to use the averaging kernel for DA
kernel_err = ncd.groups['PRODUCT'].\
variables['nitrogendioxide_tropospheric_column_precision_kernel'][:].ravel()
kernel_err_total = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].\
variables['nitrogendioxide_total_column_precision_kernel'][:].ravel()
trop_layer = ncd.groups['PRODUCT'].variables['tm5_tropopause_layer_index'][:].ravel()
total_airmass = ncd.groups['PRODUCT'].variables['air_mass_factor_total'][:].ravel()
trop_airmass = ncd.groups['PRODUCT'].\
variables['air_mass_factor_troposphere'][:].ravel()

# get info to construct the pressure level array
ps = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['INPUT_DATA'].\
variables['surface_pressure'][:]

# bottom of layer is vertice 0, very top layer is TOA (0hPa)
ak = ncd.groups['PRODUCT'].variables['tm5_constant_a'][:, 0]
bk = ncd.groups['PRODUCT'].variables['tm5_constant_b'][:, 0]
ak = ncd.groups['PRODUCT'].variables['tm5_constant_a'][:, :]
bk = ncd.groups['PRODUCT'].variables['tm5_constant_b'][:, :]

# grab the averaging kernel
avg_kernel = ncd.groups['PRODUCT'].variables['averaging_kernel'][:]
nlevs = len(avg_kernel[0, 0, 0])
AttrData['averaging_kernel_levels'] = np.int32(nlevs)
# scale the avk using AMF ratio and tropopause level for tropo column
nlocf = len(trop_layer[flg])
scaleAK = np.ones((nlocf, nlevs), dtype=np.float32)
if self.columnType == 'tropo':
# do not loop over nlocs here this makes the execution very slow
for k in range(nlevs):
scaleAK[..., k][np.full((nlocf), k, dtype=int) > trop_layer[flg]] = 0
scaleAK[..., k] *= total_airmass[flg] / trop_airmass[flg]

if first:
# add metadata variables
self.outdata[('datetime', 'MetaData')] = times
self.outdata[('latitude', 'MetaData')] = lats
self.outdata[('longitude', 'MetaData')] = lons
self.outdata[('quality_assurance_value', 'MetaData')] = qa_value
self.outdata[('troposphere_layer_index', 'RtrvlAncData')] = trop_layer
self.outdata[('air_mass_factor_total', 'RtrvlAncData')] = total_airmass
self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')] = trop_airmass
self.outdata[('datetime', 'MetaData')] = times[flg]
self.outdata[('latitude', 'MetaData')] = lats[flg]
self.outdata[('longitude', 'MetaData')] = lons[flg]
self.outdata[('quality_assurance_value', 'MetaData')] = qa_value[flg]
for k in range(nlevs):
varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_ak] = avg_kernel[..., k].ravel()
self.outdata[varname_ak] = avg_kernel[..., k].ravel()[flg] * scaleAK[..., k]
varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_pr] = ak[k] + bk[k]*ps[...].ravel()
self.outdata[varname_pr] = ak[k, 0] + bk[k, 0]*ps[...].ravel()[flg]
# add top vertice in IODA file, here it is 0hPa but can be different
# for other obs stream
varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData')
self.outdata[varname_pr] = ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()

else:
self.outdata[('datetime', 'MetaData')] = np.concatenate((
self.outdata[('datetime', 'MetaData')], times))
self.outdata[('datetime', 'MetaData')], times[flg]))
self.outdata[('latitude', 'MetaData')] = np.concatenate((
self.outdata[('latitude', 'MetaData')], lats))
self.outdata[('latitude', 'MetaData')], lats[flg]))
self.outdata[('longitude', 'MetaData')] = np.concatenate((
self.outdata[('longitude', 'MetaData')], lons))
self.outdata[('longitude', 'MetaData')], lons[flg]))
self.outdata[('quality_assurance_value', 'MetaData')] = np.concatenate((
self.outdata[('quality_assurance_value', 'MetaData')], qa_value))
self.outdata[('troposphere_layer_index', 'RtrvlAncData')] = np.concatenate((
self.outdata[('troposphere_layer_index', 'RtrvlAncData')], trop_layer))
self.outdata[('air_mass_factor_total', 'RtrvlAncData')] = np.concatenate((
self.outdata[('air_mass_factor_total', 'RtrvlAncData')], total_airmass))
self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')] = np.concatenate((
self.outdata[('air_mass_factor_troposphere', 'RtrvlAncData')], trop_airmass))
self.outdata[('quality_assurance_value', 'MetaData')], qa_value[flg]))
for k in range(nlevs):
varname_ak = ('averaging_kernel_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_ak] = np.concatenate(
(self.outdata[varname_ak], avg_kernel[..., k].ravel()))
(self.outdata[varname_ak], avg_kernel[..., k].ravel()[flg] * scaleAK[..., k]))
varname_pr = ('pressure_level_'+str(k+1), 'RtrvlAncData')
self.outdata[varname_pr] = np.concatenate(
(self.outdata[varname_pr], ak[k] + bk[k]*ps[...].ravel()))
for ncvar, iodavar in obsvars.items():
(self.outdata[varname_pr], ak[k] + bk[k]*ps[...].ravel()[flg]))
varname_pr = ('pressure_level_'+str(nlevs+1), 'RtrvlAncData')
self.outdata[varname_pr] = np.concatenate(
(self.outdata[varname_pr], ak[nlevs-1, 1] + bk[nlevs-1, 1]*ps[...].ravel()[flg]))

for ncvar, iodavar in self.obsVar.items():

if ncvar in ['nitrogendioxide_tropospheric_column']:
data = ncd.groups['PRODUCT'].variables[ncvar][:].ravel()
err = ncd.groups['PRODUCT'].variables[ncvar+'_precision'][:].ravel()
data = ncd.groups['PRODUCT'].variables[ncvar][:].ravel()[flg]
err = ncd.groups['PRODUCT'].variables[ncvar+'_precision'][:].ravel()[flg]
else:
data = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar][:].ravel()
err = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar+'_precision'][:].ravel()
data = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar][:].ravel()[flg]
err = ncd.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[ncvar+'_precision'][:].ravel()[flg]

if first:
self.outdata[self.varDict[iodavar]['valKey']] = data
self.outdata[self.varDict[iodavar]['errKey']] = err
self.outdata[self.varDict[iodavar]['qcKey']] = qc_flag
self.outdata[self.varDict[iodavar]['qcKey']] = qc_flag[flg]
else:
self.outdata[self.varDict[iodavar]['valKey']] = np.concatenate(
(self.outdata[self.varDict[iodavar]['valKey']], data))
self.outdata[self.varDict[iodavar]['errKey']] = np.concatenate(
(self.outdata[self.varDict[iodavar]['errKey']], err))
self.outdata[self.varDict[iodavar]['qcKey']] = np.concatenate(
(self.outdata[self.varDict[iodavar]['qcKey']], qc_flag))
(self.outdata[self.varDict[iodavar]['qcKey']], qc_flag[flg]))

first = False

DimDict['nlocs'] = len(self.outdata[('datetime', 'MetaData')])
AttrData['nlocs'] = np.int32(DimDict['nlocs'])

Expand All @@ -180,7 +192,7 @@ def main():
# get command line arguments
parser = argparse.ArgumentParser(
description=(
'Reads TROPOMI NO2 netCDF files provided by NESDIS'
'Reads TROPOMI NO2 netCDF files: official Copernicus product'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch :-)

'and converts into IODA formatted output files. Multiple'
'files are able to be concatenated.')
)
Expand All @@ -194,17 +206,41 @@ def main():
'-o', '--output',
help="path of IODA output file",
type=str, required=True)
required.add_argument(
'-c', '--column',
help="type of column: total or tropo",
type=str, required=True)

args = parser.parse_args()

if args.column == "tropo":

obsVar = {
'nitrogendioxide_tropospheric_column': 'nitrogen_dioxide_in_tropospheric_column'
}

varDims = {
'nitrogen_dioxide_in_tropospheric_column': ['nlocs']
}

elif args.column == "total":

obsVar = {
'nitrogendioxide_total_column': 'nitrogen_dioxide_in_total_column'
}

varDims = {
'nitrogen_dioxide_in_total_column': ['nlocs']
}

# Read in the NO2 data
no2 = tropomi(args.input)
no2 = tropomi(args.input, args.column, obsVar)

# setup the IODA writer
writer = iconv.IodaWriter(args.output, locationKeyList, DimDict)

# write everything out
writer.BuildIoda(no2.outdata, VarDims, no2.varAttrs, AttrData)
writer.BuildIoda(no2.outdata, varDims, no2.varAttrs, AttrData)


if __name__ == '__main__':
Expand Down
22 changes: 18 additions & 4 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ list( APPEND test_output
testoutput/ioda.NC001007.2020031012.nc
testoutput/viirs_aod.nc
testoutput/viirs_bias.nc
testoutput/tropomi_no2.nc
testoutput/tropomi_no2_total.nc
testoutput/tropomi_no2_tropo.nc
testoutput/mopitt_co.nc
testoutput/modis_aod.nc
testoutput/imssnow_scf.nc
Expand Down Expand Up @@ -858,16 +859,29 @@ ecbuild_add_test( TARGET test_${PROJECT_NAME}_satbias_viirs_aod
-o testrun/viirs_bias.nc"
viirs_bias.nc ${IODA_CONV_COMP_TOL_ZERO})

ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2
ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2_total
TYPE SCRIPT
ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}"
COMMAND bash
ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh
netcdf
"${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/tropomi_no2_nc2ioda.py
-i testinput/tropomi_no2.nc
-o testrun/tropomi_no2.nc"
tropomi_no2.nc ${IODA_CONV_COMP_TOL_ZERO})
-o testrun/tropomi_no2_total.nc
-c total"
tropomi_no2_total.nc ${IODA_CONV_COMP_TOL_ZERO})

ecbuild_add_test( TARGET test_${PROJECT_NAME}_tropomi_no2_tropo
TYPE SCRIPT
ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}"
COMMAND bash
ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh
netcdf
"${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/tropomi_no2_nc2ioda.py
-i testinput/tropomi_no2.nc
-o testrun/tropomi_no2_tropo.nc
-c tropo"
tropomi_no2_tropo.nc ${IODA_CONV_COMP_TOL_ZERO})

ecbuild_add_test( TARGET test_${PROJECT_NAME}_mopitt_co
TYPE SCRIPT
Expand Down
4 changes: 2 additions & 2 deletions test/testoutput/mopitt_co.nc
Git LFS file not shown
3 changes: 0 additions & 3 deletions test/testoutput/tropomi_no2.nc

This file was deleted.

3 changes: 3 additions & 0 deletions test/testoutput/tropomi_no2_total.nc
Git LFS file not shown
3 changes: 3 additions & 0 deletions test/testoutput/tropomi_no2_tropo.nc
Git LFS file not shown