Skip to content

Commit 31d4ff6

Browse files
authored
Merge pull request #432 from ioos/update_qc_requirements
Update QC Requirements
2 parents ff9a837 + 896e98d commit 31d4ff6

File tree

3 files changed

+43
-50
lines changed

3 files changed

+43
-50
lines changed

glider_qc/glider_qc.py

+36-38
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from cf_units import Unit
77
from netCDF4 import num2date, Dataset
88
import datetime
9-
from ioos_qc.qartod import aggregate
9+
from ioos_qc.stores import PandasStore
1010
from ioos_qc.streams import PandasStream
1111
from ioos_qc.results import collect_results, CollectedResult
1212
from ioos_qc.config import Config
@@ -430,35 +430,29 @@ def apply_qc(self, df, varname, configset):
430430

431431
# Step 3: Run the QC tests
432432
try:
433-
runner = list(qc_x.run(c_x))
433+
results = qc_x.run(c_x)
434434
except Exception as e:
435435
log.error(f"Error running QC tests on {varname}: {e}")
436436
return []
437437

438-
# Step 4: Collect the results from the QC run
438+
# Step 4: Store the results in another DataFrame
439439
try:
440-
results = collect_results(runner, how='list')
440+
store = PandasStore(results)
441441
except Exception as e:
442442
log.error(f"Error collecting QC results for {varname}: {e}")
443443
return []
444444

445-
# Step 5: Add the qc_rollup results
445+
# Step 5: Compute any aggregations
446446
try:
447-
agg = CollectedResult(
448-
stream_id=varname,
449-
package='qartod',
450-
test='qc_rollup',
451-
function=aggregate,
452-
results=aggregate(results),
453-
tinp=qc_x.time(),
454-
data=qc_x.data(varname)
455-
)
456-
results.append(agg)
447+
store.compute_aggregate(name='rollup_qc') # Appends to the results internally
457448
except Exception as e:
458-
log.error(f"Error adding qc_rollup for {varname}: {e}")
449+
log.error(f"Error computing any aggregations for {varname}: {e}")
459450
return []
460451

461-
return results
452+
# Step 6: Write only the test results to the store
453+
results_store = store.save(write_data=False, write_axes=False)
454+
455+
return results_store
462456

463457
def check_geophysical_variables(self, var_name):
464458
'''
@@ -647,16 +641,17 @@ def check_time(self, tnp, nc_path):
647641
return ' '.join(report_list)
648642

649643
# the main function
650-
def run_qc(config, ncfile, nc_path):
644+
def run_qc(config, ncfile, ncfile_path):
651645
'''
652646
Runs IOOS QARTOD tests on a netCDF file
653647
654648
:param config: string defining path to the configuration file
655-
:param nc_path: string defining path to the netCDF file
649+
:param ncfile_path: string defining path to the netCDF file
656650
:param ncfile: netCDF4._netCDF4.Dataset
657651
'''
658652
report_list = []
659653
xyz = GliderQC(ncfile, config)
654+
deployment_name = ncfile_path.split('/')[-2]
660655
file_name = ncfile_path.split('/')[-1]
661656

662657
times = ncfile.variables['time']
@@ -672,7 +667,7 @@ def run_qc(config, ncfile, nc_path):
672667
# log time array issues
673668
report = ' '.join(report_list).strip()
674669
if len(report.strip()) != 0:
675-
ncfile.dac_qc_comment = file_name + ': ' + report
670+
ncfile.dac_qc_comment = str(deployment_name) + ' (' + str(file_name) + ': ' + report + ')'
676671
else:
677672
log.info(" Running IOOS QARTOD tests on %s", file_name)
678673

@@ -686,7 +681,7 @@ def run_qc(config, ncfile, nc_path):
686681
report_list.append(f"{location_err}: {str(e)}")
687682

688683
# Find geophysical variables
689-
legacy_variables, note = xyz.find_geophysical_variables() #ncfile
684+
legacy_variables, note = xyz.find_geophysical_variables()
690685
if not legacy_variables:
691686
log.info("No variables found.")
692687
report_list.append("No variables found.")
@@ -706,7 +701,7 @@ def run_qc(config, ncfile, nc_path):
706701

707702
# Check the Data Array
708703
if xyz.check_geophysical_variables(var_name): #cfile,
709-
report_list.append(var_name + ': ' + xyz.check_geophysical_variables(var_name))
704+
report_list.append(xyz.check_geophysical_variables(var_name))
710705
continue
711706

712707
# Check the mapping of standard names with units
@@ -739,31 +734,23 @@ def run_qc(config, ncfile, nc_path):
739734
results = xyz.apply_qc(df,var_name, config_set)
740735
log.info("Generated QC test results for %s", var_name)
741736

742-
for testname in ['gross_range_test', 'spike_test', 'rate_of_change_test', 'flat_line_test',
743-
'qc_rollup']:
744-
try:
745-
qc_test = next(r for r in results if r.stream_id == var_name and r.test == testname)
746-
except Exception as e:
747-
test_err = "Unable to read qc test results"
748-
log.exception(f"{test_err}: {str(e)}")
749-
report_list.append(f"{test_err}: {str(e)}")
750-
continue
737+
for testname in results.columns:
751738

752739
# create the qartod variable name and get the config specs
753-
if testname == 'qc_rollup':
740+
if testname == 'qartod_rollup_qc':
754741
qartodname = 'qartod_'+ var_name + '_primary_flag'
755742
# Pass the config specs to a variable
756743
testconfig = config_set['contexts'][0]['streams'][var_name]['qartod']
757744
else:
758-
qartodname = 'qartod_'+ var_name + '_'+ testname.split('_test')[0]+'_flag'
745+
qartodname = 'qartod_'+ var_name + '_'+ testname.split('qartod_')[-1].split('_test')[0]+'_flag'
759746
# Pass the config specs to a variable
760-
testconfig = config_set['contexts'][0]['streams'][var_name]['qartod'][testname]
747+
testconfig = config_set['contexts'][0]['streams'][var_name]['qartod'][testname.split('qartod_')[-1]]
761748

762749
# Update the qartod variable
763750
log.info("Updating %s", qartodname)
764751
qartod_var = ncfile.variables[qartodname]
765-
qartod_var[:] = np.array(qc_test.results)
766-
qartod_var.qartod_test = f"{testname}"
752+
qartod_var[:] = np.array(results[testname].values)
753+
qartod_var.qartod_test = f"{testname.split('qartod_')[-1]}"
767754

768755
# Set the dictionary as a string attribute to the variable
769756
qartod_var.setncattr('qartod_config', json.dumps(testconfig))
@@ -772,10 +759,10 @@ def run_qc(config, ncfile, nc_path):
772759
apply_qc_err = "apply_qc failed: could not calculate QC flags."
773760
log.exception(f"{apply_qc_err}: ")
774761
report_list.append(f"{apply_qc_err}: {str(e)}")
775-
continu
762+
continue
776763
# log issues qc
777764
report = ' '.join(report_list).strip()
778-
ncfile.dac_qc_comment = '(' + file_name + ': ' + str(report) + ') '
765+
ncfile.dac_qc_comment = str(deployment_name) + ' (' + str(file_name) + ': ' + str(report) + ')'
779766

780767
def qc_task(nc_path, config):
781768
'''
@@ -856,6 +843,17 @@ def check_needs_qc(nc_path):
856843
# if this section was reached, QC has been run, but xattr remains unset
857844
try:
858845
os.setxattr(nc_path, "user.qc_run", b"true")
846+
847+
# TODO: Set time as the extended file attribute
848+
# Get the current date-time in ISO format
849+
#iso_date = datetime.datetime.utcnow().isoformat()
850+
851+
# Convert it to bytes
852+
#iso_date_bytes = iso_date.encode("utf-8")
853+
854+
# Set the extended attribute
855+
#os.setxattr(nc_path, "user.qc_run", iso_date_bytes)
856+
859857
except OSError:
860858
log.exception(f"Exception occurred trying to set xattr on already QCed file at {nc_path}:")
861859
return False

requirements.txt

+2-3
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@ thredds_crawler>=1.5.4
2626
pyyaml>=5.2
2727
rq==1.3.0
2828
rq-dashboard==0.3.4
29-
# pin IOOS QC to a particular commit on main until stable version with fixes is released
30-
-e git+https://github.com/ioos/ioos_qc@57813de111af201f8d7b68c7fcd6337bf37dd6ed#egg=ioos_qc
29+
ioos-qc==2.2.0
3130
netCDF4>=1.4.0
3231
cf-units>=2
33-
# cftime needs to be frozen for utime. Some scripts could be rewritten to use
32+
# cftime needs to be frozen for utime. Some scripts could be rewritten to use
3433
# newer functions
3534
cftime==1.4.1
3635
flask-cors==4.0.2

tests/test_glider_qc.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -78,23 +78,21 @@ def test_apply_qc(self):
7878

7979
results_raw = qc.apply_qc(df, 'temperature', qc_config)
8080

81-
results_dict = {r.test: r.results for r in results_raw if r.stream_id == 'temperature'}
82-
8381
np.testing.assert_equal(
8482
np.array([1, 1, 1, 1, 1, 1, 1, 1], dtype=np.int8),
85-
results_dict['gross_range_test'][:])
83+
np.array(results_raw['temperature_qartod_gross_range_test'].values))
8684

8785
np.testing.assert_equal(
8886
np.array([1, 1, 1, 1, 1, 1, 1, 1], dtype=np.int8),
89-
results_dict['flat_line_test'])
87+
np.array(results_raw['temperature_qartod_flat_line_test'].values))
9088

9189
np.testing.assert_equal(
9290
np.array([1, 1, 1, 1, 1, 1, 1, 1], dtype=np.int8),
93-
results_dict['rate_of_change_test'])
91+
np.array(results_raw['temperature_qartod_rate_of_change_test'].values))
9492

9593
np.testing.assert_equal(
9694
np.array([2, 1, 1, 1, 1, 1, 1, 2], dtype=np.int8),
97-
results_dict['spike_test'])
95+
np.array(results_raw['temperature_qartod_spike_test'].values))
9896

9997
def test_units_qc(self):
10098
fd, fake_file = tempfile.mkstemp()
@@ -131,9 +129,7 @@ def test_units_qc(self):
131129
df = pd.DataFrame({"time": times[:].astype('datetime64[s]'), "temp": values,},)
132130
results_raw = qc.apply_qc(df, 'temp', qc_config)
133131

134-
results_dict = {r.test: r.results for r in results_raw if r.stream_id == 'temp'}
135-
136-
np.testing.assert_equal(results_dict['flat_line_test'][:], np.array([1, 1, 1, 3, 4, 9, 4, 4, 1, 9], dtype=np.int8))
132+
np.testing.assert_equal(np.array(results_raw['temp_qartod_flat_line_test'].values), np.array([1, 1, 1, 3, 4, 9, 4, 4, 1, 9], dtype=np.int8))
137133

138134
def test_normalize_variable(self):
139135
values = np.array([32.0, 65.0, 100.0])

0 commit comments

Comments
 (0)