From 61653c4ebf17910fb9638251968b1de021c84951 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Tue, 6 May 2025 15:34:31 -0500 Subject: [PATCH 01/10] adding database --- ush/python/pyobsforge/obsdb/nesdis_mirs_db.py | 188 +++++++++++ .../tests/test_nesdis_mirs_database.py | 303 ++++++++++++++++++ 2 files changed, 491 insertions(+) create mode 100644 ush/python/pyobsforge/obsdb/nesdis_mirs_db.py create mode 100644 ush/python/pyobsforge/tests/test_nesdis_mirs_database.py diff --git a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py new file mode 100644 index 00000000..2a9c2b86 --- /dev/null +++ b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py @@ -0,0 +1,188 @@ +import os +import glob +from datetime import datetime +from pyobsforge.obsdb import BaseDatabase + + +class NesdisMirsDatabase(BaseDatabase): + """Class to manage an observation file database for data assimilation.""" + + def __init__(self, obs_dirs, db_name="nesdis_mirs.db", dcom_dir="/lfs/h1/ops/prod/dcom/"): + base_dirs = [os.path.join(dcom_dir, '*', obs_dir) for obs_dir in obs_dirs] + super().__init__(db_name=db_name, base_dir=base_dirs) + + def create_database(self): + """ + Create the SQLite database and observation files table. + + This method initializes the database with a table named `obs_files` to store metadata + about observation files. The table contains the following columns: + + - `id`: A unique identifier for each record (auto-incremented primary key). + - `filename`: The full path to the observation file (must be unique). + - `obs_time`: The timestamp of the observation, extracted from the filename. + - `receipt_time`: The timestamp when the file was added to the `dcom` directory. + - `satellite`: The satellite from which the observation was collected (e.g., GW1). + - `obs_type`: The type of observation (e.g., SEAICE) + + The table is created if it does not already exist. + """ + query = """ + CREATE TABLE IF NOT EXISTS obs_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT UNIQUE, + obs_time TIMESTAMP, + receipt_time TIMESTAMP, + instrument TEXT, + satellite TEXT, + obs_type TEXT + ) + """ + self.execute_query(query) + + def parse_filename(self, filename): + try: + fname = os.path.basename(filename) + parts = fname.split("_") + + if len(parts) < 6 or not parts[3].startswith("s") or not parts[5].startswith("c"): + print(f"[DEBUG] Unexpected filename format: {fname}") + return None + + instrument = parts[0].split("-")[0] # 'NPR' + satellite = parts[2] # e.g. 'n21' + obs_type = { + "ma1": "icec_amsu_ma1_l2", + "n20": "icec_atms_n20_l2", + "n21": "icec_atms_n21_l2", + "npp": "icec_atms_npp_l2", + "gpm": "icec_gmi_gpm_l2" + }.get(satellite.lower(), None) + if obs_type is None: + print(f"[DEBUG] Unrecognized satellite: {satellite}") + return None + +# obs_time_str = parts[3][1:15] +# receipt_time_str = parts[5].split(".")[0][1:15] + +# obs_time = datetime.strptime(obs_time_str, "%Y%m%d%H%M%S") +# receipt_time = datetime.strptime(receipt_time_str, "%Y%m%d%H%M%S") + obs_time = datetime.strptime(parts[3][1:15], "%Y%m%d%H%M%S") + receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) + return filename, obs_time, receipt_time, instrument, satellite, obs_type + + except Exception as e: + print(f"[ERROR] Failed to parse {filename}: {e}") + return None + +# def parse_filename(self, filename): + """Extract metadata from filenames matching the MIRS-TYPE-SEAICE pattern + NPR-MIRS-IMG_v11r9_ma1_s202504300706550_e202504300756360_c202504300838450.nc + NPR-MIRS-IMG_v11r9_n20_s202504300858350_e202504300859066_c202504300933000.nc + NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc + NPR-MIRS-IMG_v11r9_npp_s202504300858336_e202504300859053_c202504300916400.nc + NPR-MIRS-IMG_v11r9_gpm_s202504300848270_e202504300853250_c202504300912100.nc + """ +# parts = os.path.basename(filename).split('_') + + # Pre-check: Must be an MIRS SEAICE file +# if not parts[0].startswith("NPR-MIRS-IMG"): +# print(f"[DEBUG] Skipping non AMSR2-SEAICE file: {filename}") +# return None + +# try: + # Extract hemisphere from the first hyphen-separated segment +# name_parts = parts[0].split('-') +# instrument = name_parts[0] +# satellite = parts[2] + + # Determine obs_type based on satellite +# if satellite == "ma1": +# obs_type = "icec_amsu_ma1_l2" +# elif satellite == "n20": +# obs_type = "icec_atms_n20_l2" +# elif satellite == "n21": +# obs_type = "icec_atms_n21_l2" +# elif satellite == "npp": +# obs_type = "icec_atms_npp_l2" +# elif satellite == "gpm": +# obs_type = "icec_gmi_gpm_l2" +# else: +# print(f"[DEBUG] Unrecognized satellite in filename: {filename}") +# return None +# obs_time = datetime.strptime(parts[3][1:15], "%Y%m%d%H%M%S") +# receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) +# return filename, obs_time, receipt_time, instrument, satellite, obs_type + + # except Exception as e: + # print(f"[DEBUG] Error parsing filename {filename}: {e}") + # return None + + def ingest_files(self): + obs_files = [] + for base in self.base_dir: + matched = glob.glob(os.path.join(base, "*.nc")) + obs_files.extend(matched) + + ingested_count = 0 + for file in obs_files: + parsed_data = self.parse_filename(file) + if not parsed_data: + print(f"[WARN] Skipped (unparseable): {os.path.basename(file)}") + continue + + query = """ + INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) + VALUES (?, ?, ?, ?, ?, ?) + """ + try: + self.insert_record(query, parsed_data) + ingested_count += 1 + except Exception as e: + print(f"[ERROR] Failed to insert {file}: {e}") + + print(f"[INFO] Successfully ingested {ingested_count} files into the database.") + +# def ingest_files(self): # make sure this is indented inside the class +# obs_files = [] +# for base in self.base_dir: +# matched = glob.glob(os.path.join(base, "*.nc")) +# obs_files.extend(matched) + +# ingested_count = 0 +# for file in obs_files: +# parsed_data = self.parse_filename(file) +# if parsed_data: +# query = """ +# INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) +# VALUES (?, ?, ?, ?, ?, ?) +# """ +# try: +# self.insert_record(query, parsed_data) +# ingested_count += 1 +# except Exception as e: +# print(f"[DEBUG] Failed to insert record for {file}: {e}") +# +# print(f"[INFO] Successfully ingested {ingested_count} files.") + + +# def ingest_files(self): +# """Scan the directory for new observation files and insert them into the database.""" +# obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) + + # Counter for successful ingestions +# ingested_count = 0 + +# for file in obs_files: +# parsed_data = self.parse_filename(file) +# if parsed_data: +# query = """ +# INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) +# VALUES (?, ?, ?, ?, ?, ?) +# """ +# try: +# self.insert_record(query, parsed_data) +# ingested_count += 1 +# except Exception as e: +# print(f"[DEBUG] Failed to insert record for {file}: {e}") +# print(f"################################ Successfully ingested {ingested_count} files into the database.") diff --git a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py new file mode 100644 index 00000000..24d10a01 --- /dev/null +++ b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py @@ -0,0 +1,303 @@ +import os +import glob +import tempfile +import shutil +import sqlite3 +from datetime import datetime, timedelta + +import pytest + +from pyobsforge.obsdb.nesdis_mirs_db import NesdisMirsDatabase # Adjust as needed + + +@pytest.fixture +def temp_obs_dir(): + """Create a temp directory with mock NESDIS MIRS NetCDF files sorted by satellite.""" + base_dir = tempfile.mkdtemp() + + # Folder mapping from satellite name in filename + sat_folder_map = { + "ma1": "seaice_amsu", + "n20": "seaice_atms_j1", + "n21": "seaice_atms_j2", + "npp": "seaice_atms_snpp", + "gpm": "seaice_mirs" + } + + # Create all needed subdirectories + for folder in sat_folder_map.values(): + os.makedirs(os.path.join(base_dir, "some_subdir", folder), exist_ok=True) + + mock_time = datetime(2025, 4, 30, 6, 0, 0).timestamp() + + # List of mock files + filenames = [ + "NPR-MIRS-IMG_v11r9_ma1_s202504300706550_e202504300756360_c202504300838450.nc", + "NPR-MIRS-IMG_v11r9_ma1_s202504300752070_e202504300847560_c202504300922220.nc", + "NPR-MIRS-IMG_v11r9_ma1_s202504300847510_e202504300937400_c202504301019390.nc", + "NPR-MIRS-IMG_v11r9_ma1_s202504300933110_e202504301028440_c202504301105480.nc", + "NPR-MIRS-IMG_v11r9_ma1_s202504301028550_e202504301118440_c202504301201320.nc", + "NPR-MIRS-IMG_v11r9_ma1_s202504301114230_e202504301208280_c202504301243330.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300858350_e202504300859066_c202504300933000.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300859070_e202504300859386_c202504300931380.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300859390_e202504300900106_c202504300932300.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300900110_e202504300900426_c202504300931530.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300900430_e202504300901146_c202504300933000.nc", + "NPR-MIRS-IMG_v11r9_n20_s202504300901150_e202504300901466_c202504300932000.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300859044_e202504300859360_c202504300934410.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300859364_e202504300900080_c202504300934330.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300900084_e202504300900400_c202504300933390.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300900404_e202504300901120_c202504300933450.nc", + "NPR-MIRS-IMG_v11r9_n21_s202504300901124_e202504300901440_c202504300934590.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300858336_e202504300859053_c202504300916400.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300859056_e202504300859373_c202504300916500.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300859376_e202504300900093_c202504300916510.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300900096_e202504300900413_c202504300917320.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300900416_e202504300901133_c202504300917350.nc", + "NPR-MIRS-IMG_v11r9_npp_s202504300901136_e202504300901453_c202504301103340.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300848270_e202504300853250_c202504300912100.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300853270_e202504300858250_c202504300918440.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300858270_e202504300903250_c202504300924230.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300903270_e202504300908250_c202504300935120.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300908270_e202504300913250_c202504300936130.nc", + "NPR-MIRS-IMG_v11r9_gpm_s202504300913270_e202504300918250_c202504300940510.nc" +# "invalid_file.nc" + ] + + # Create valid files in correct subdirs + for fname in filenames: + try: + # Extract satellite identifier from filename + sat = fname.split("_")[2] # e.g., 'ma1', 'n20', 'npp' + folder = sat_folder_map.get(sat) + + if folder is None: + print(f"[WARNING] Skipping unrecognized satellite in: {fname}") + continue + + # Create full path + path = os.path.join(base_dir, "some_subdir", folder, fname) + with open(path, "w") as f: + f.write("fake content") + os.utime(path, (mock_time, mock_time)) + except IndexError: + print(f"[ERROR] Failed to parse satellite from filename: {fname}") + continue + + for folder in sat_folder_map.values(): + invalid_path = os.path.join(base_dir, "some_subdir", folder, "invalid_file.nc") + with open(invalid_path, "w") as f: + f.write("invalid content") + os.utime(invalid_path, (mock_time, mock_time)) + + yield base_dir + shutil.rmtree(base_dir) + + +@pytest.fixture +def db(temp_obs_dir): + """Initialize test database.""" + db_path = os.path.join(temp_obs_dir, "nesdis_mirs_test.db") + + # List of seaice-related subfolders to include + obs_dirs = [ + "seaice_amsu", + "seaice_atms_j1", + "seaice_atms_j2", + "seaice_atms_snpp", + "seaice_mirs" + ] + + database = NesdisMirsDatabase( + db_name=db_path, + dcom_dir=temp_obs_dir, + obs_dirs=obs_dirs # Pass list of directories + ) + return database + + +def test_create_database(db): + db.create_database() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='obs_files'") + assert cursor.fetchone() is not None + conn.close() + + +def test_parse_valid_filename(db): + fname = "NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc" + + # Search through all base_dir entries + found_files = [] + for base in db.base_dir: + matches = glob.glob(os.path.join(base, fname)) + if matches: + found_files.extend(matches) + + assert found_files, f"{target_name} not found in any db.base_dir paths" + fname = found_files[0] + + # Parse filename + parsed = db.parse_filename(fname) + creation_time = datetime.fromtimestamp(os.path.getctime(fname)) + + # Assertions + assert parsed is not None + assert parsed[0] == fname + assert parsed[1] == datetime(2025, 4, 30, 8, 58, 32) + assert parsed[2] == creation_time + assert parsed[3] == "NPR" + assert parsed[4] == "n21" + assert parsed[5] == "icec_atms_n21_l2" + + +def test_parse_invalid_filename(db): + assert db.parse_filename("junk.nc") is None + assert db.parse_filename("NPR-MIRS-IMG_v11r9_n21_invalid.nc") is None + + +def test_ingest_files(db): + db.ingest_files() + + # Debug: show number of files discovered for ingestion + total_files = 0 + for base in db.base_dir: + matched = glob.glob(os.path.join(base, "*.nc")) + print(f"[DEBUG] {len(matched)} files found in {base}") + total_files += len(matched) + print(f"[DEBUG] Total NetCDF files found for ingestion: {total_files}") + + # Validate records written to database + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM obs_files") + count = cursor.fetchone()[0] + conn.close() + + assert count == 30, "Should ingest 30 valid MIRS files" + + +def test_get_valid_files(db): + db.ingest_files() + da_cycle = "20250430060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'icec' + # Test for MIRS ICEC + valid_files_ma1 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="ma1", + obs_type="icec_amsu_ma1_l2") + + valid_files_n20 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="n20", + obs_type="icec_atms_n20_l2") + + valid_files_n21 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="n21", + obs_type="icec_atms_n21_l2") + + valid_files_npp = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="npp", + obs_type="icec_atms_npp_l2") + + valid_files_gpm = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="gpm", + obs_type="icec_gmi_gpm_l2") + + valid_files = ( + valid_files_ma1 + valid_files_n20 + + valid_files_n21 + valid_files_npp + + valid_files_gpm + ) + + # Files at 10:00 and 12:00 are within +/- 3h of 00:00 + assert any("202504300706" in f for f in valid_files) + assert any("202504300859" in f for f in valid_files) + assert any("202504300900" in f for f in valid_files) + assert any("202504300853" in f for f in valid_files) + assert any("202504300900" not in f for f in valid_files) + assert all("202504300901" not in f for f in valid_files) + + print("Valid files found:", len(valid_files)) + for f in valid_files: + print(" -", f) + assert len(valid_files) == 15 + + +def test_get_valid_files_receipt(db): + db.ingest_files() + da_cycle = "20250430060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'icec' + + # Test for MIRS ICEC + valid_files_ma1 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="ma1", + obs_type="icec_amsu_ma1_l2", + check_receipt="gfs") + + valid_files_n20 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="n20", + obs_type="icec_atms_n20_l2", + check_receipt="gfs") + + valid_files_n21 = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="n21", + obs_type="icec_atms_n21_l2", + check_receipt="gfs") + + valid_files_npp = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="npp", + obs_type="icec_atms_npp_l2", + check_receipt="gfs") + + valid_files_gpm = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="NPR", + satellite="gpm", + obs_type="icec_gmi_gpm_l2", + check_receipt="gfs") + + valid_files = ( + valid_files_ma1 + valid_files_n20 + + valid_files_n21 + valid_files_npp + + valid_files_gpm + ) + + print("Valid files found:", len(valid_files)) + for f in valid_files: + print(" -", f) + + # TODO (G): Giving up for now on trying to mock the receipt time, will revisit later + assert len(valid_files) == 15 From 06a44b6f0df131207a9aa59f0a6e54d2cebd0c43 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Tue, 6 May 2025 15:49:34 -0500 Subject: [PATCH 02/10] clean up --- ush/python/pyobsforge/obsdb/nesdis_mirs_db.py | 112 +++--------------- .../tests/test_nesdis_mirs_database.py | 4 +- 2 files changed, 17 insertions(+), 99 deletions(-) diff --git a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py index 2a9c2b86..1a74f415 100644 --- a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py +++ b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py @@ -7,7 +7,9 @@ class NesdisMirsDatabase(BaseDatabase): """Class to manage an observation file database for data assimilation.""" - def __init__(self, obs_dirs, db_name="nesdis_mirs.db", dcom_dir="/lfs/h1/ops/prod/dcom/"): + def __init__(self, obs_dirs, + db_name="nesdis_mirs.db", + dcom_dir="/lfs/h1/ops/prod/dcom/"): base_dirs = [os.path.join(dcom_dir, '*', obs_dir) for obs_dir in obs_dirs] super().__init__(db_name=db_name, base_dir=base_dirs) @@ -41,6 +43,13 @@ def create_database(self): self.execute_query(query) def parse_filename(self, filename): + """Extract metadata from filenames matching the MIRS-TYPE-SEAICE pattern + NPR-MIRS-IMG_v11r9_ma1_s202504300706550_e202504300756360_c202504300838450.nc + NPR-MIRS-IMG_v11r9_n20_s202504300858350_e202504300859066_c202504300933000.nc + NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc + NPR-MIRS-IMG_v11r9_npp_s202504300858336_e202504300859053_c202504300916400.nc + NPR-MIRS-IMG_v11r9_gpm_s202504300848270_e202504300853250_c202504300912100.nc + """ try: fname = os.path.basename(filename) parts = fname.split("_") @@ -49,8 +58,8 @@ def parse_filename(self, filename): print(f"[DEBUG] Unexpected filename format: {fname}") return None - instrument = parts[0].split("-")[0] # 'NPR' - satellite = parts[2] # e.g. 'n21' + instrument = parts[0].split("-")[0] + satellite = parts[2] obs_type = { "ma1": "icec_amsu_ma1_l2", "n20": "icec_atms_n20_l2", @@ -58,15 +67,11 @@ def parse_filename(self, filename): "npp": "icec_atms_npp_l2", "gpm": "icec_gmi_gpm_l2" }.get(satellite.lower(), None) + if obs_type is None: print(f"[DEBUG] Unrecognized satellite: {satellite}") return None -# obs_time_str = parts[3][1:15] -# receipt_time_str = parts[5].split(".")[0][1:15] - -# obs_time = datetime.strptime(obs_time_str, "%Y%m%d%H%M%S") -# receipt_time = datetime.strptime(receipt_time_str, "%Y%m%d%H%M%S") obs_time = datetime.strptime(parts[3][1:15], "%Y%m%d%H%M%S") receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) return filename, obs_time, receipt_time, instrument, satellite, obs_type @@ -75,49 +80,6 @@ def parse_filename(self, filename): print(f"[ERROR] Failed to parse {filename}: {e}") return None -# def parse_filename(self, filename): - """Extract metadata from filenames matching the MIRS-TYPE-SEAICE pattern - NPR-MIRS-IMG_v11r9_ma1_s202504300706550_e202504300756360_c202504300838450.nc - NPR-MIRS-IMG_v11r9_n20_s202504300858350_e202504300859066_c202504300933000.nc - NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc - NPR-MIRS-IMG_v11r9_npp_s202504300858336_e202504300859053_c202504300916400.nc - NPR-MIRS-IMG_v11r9_gpm_s202504300848270_e202504300853250_c202504300912100.nc - """ -# parts = os.path.basename(filename).split('_') - - # Pre-check: Must be an MIRS SEAICE file -# if not parts[0].startswith("NPR-MIRS-IMG"): -# print(f"[DEBUG] Skipping non AMSR2-SEAICE file: {filename}") -# return None - -# try: - # Extract hemisphere from the first hyphen-separated segment -# name_parts = parts[0].split('-') -# instrument = name_parts[0] -# satellite = parts[2] - - # Determine obs_type based on satellite -# if satellite == "ma1": -# obs_type = "icec_amsu_ma1_l2" -# elif satellite == "n20": -# obs_type = "icec_atms_n20_l2" -# elif satellite == "n21": -# obs_type = "icec_atms_n21_l2" -# elif satellite == "npp": -# obs_type = "icec_atms_npp_l2" -# elif satellite == "gpm": -# obs_type = "icec_gmi_gpm_l2" -# else: -# print(f"[DEBUG] Unrecognized satellite in filename: {filename}") -# return None -# obs_time = datetime.strptime(parts[3][1:15], "%Y%m%d%H%M%S") -# receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) -# return filename, obs_time, receipt_time, instrument, satellite, obs_type - - # except Exception as e: - # print(f"[DEBUG] Error parsing filename {filename}: {e}") - # return None - def ingest_files(self): obs_files = [] for base in self.base_dir: @@ -130,7 +92,7 @@ def ingest_files(self): if not parsed_data: print(f"[WARN] Skipped (unparseable): {os.path.basename(file)}") continue - + query = """ INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) VALUES (?, ?, ?, ?, ?, ?) @@ -140,49 +102,5 @@ def ingest_files(self): ingested_count += 1 except Exception as e: print(f"[ERROR] Failed to insert {file}: {e}") - - print(f"[INFO] Successfully ingested {ingested_count} files into the database.") -# def ingest_files(self): # make sure this is indented inside the class -# obs_files = [] -# for base in self.base_dir: -# matched = glob.glob(os.path.join(base, "*.nc")) -# obs_files.extend(matched) - -# ingested_count = 0 -# for file in obs_files: -# parsed_data = self.parse_filename(file) -# if parsed_data: -# query = """ -# INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) -# VALUES (?, ?, ?, ?, ?, ?) -# """ -# try: -# self.insert_record(query, parsed_data) -# ingested_count += 1 -# except Exception as e: -# print(f"[DEBUG] Failed to insert record for {file}: {e}") -# -# print(f"[INFO] Successfully ingested {ingested_count} files.") - - -# def ingest_files(self): -# """Scan the directory for new observation files and insert them into the database.""" -# obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) - - # Counter for successful ingestions -# ingested_count = 0 - -# for file in obs_files: -# parsed_data = self.parse_filename(file) -# if parsed_data: -# query = """ -# INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) -# VALUES (?, ?, ?, ?, ?, ?) -# """ -# try: -# self.insert_record(query, parsed_data) -# ingested_count += 1 -# except Exception as e: -# print(f"[DEBUG] Failed to insert record for {file}: {e}") -# print(f"################################ Successfully ingested {ingested_count} files into the database.") + print(f"[INFO] Successfully ingested {ingested_count} files into the database.") diff --git a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py index 24d10a01..62b2a878 100644 --- a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py +++ b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py @@ -62,14 +62,14 @@ def temp_obs_dir(): "NPR-MIRS-IMG_v11r9_gpm_s202504300903270_e202504300908250_c202504300935120.nc", "NPR-MIRS-IMG_v11r9_gpm_s202504300908270_e202504300913250_c202504300936130.nc", "NPR-MIRS-IMG_v11r9_gpm_s202504300913270_e202504300918250_c202504300940510.nc" -# "invalid_file.nc" + "invalid_file.nc" ] # Create valid files in correct subdirs for fname in filenames: try: # Extract satellite identifier from filename - sat = fname.split("_")[2] # e.g., 'ma1', 'n20', 'npp' + sat = fname.split("_")[2] folder = sat_folder_map.get(sat) if folder is None: From d89b3b2985bc8ecc708906e8483c6a7f12762f34 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Tue, 6 May 2025 15:56:49 -0500 Subject: [PATCH 03/10] instrument --- ush/python/pyobsforge/obsdb/nesdis_mirs_db.py | 2 +- .../tests/test_nesdis_mirs_database.py | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py index 1a74f415..0fd44ef4 100644 --- a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py +++ b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py @@ -58,7 +58,7 @@ def parse_filename(self, filename): print(f"[DEBUG] Unexpected filename format: {fname}") return None - instrument = parts[0].split("-")[0] + instrument = parts[0].split("-")[1] satellite = parts[2] obs_type = { "ma1": "icec_amsu_ma1_l2", diff --git a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py index 62b2a878..9c24ac8f 100644 --- a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py +++ b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py @@ -148,7 +148,7 @@ def test_parse_valid_filename(db): assert parsed[0] == fname assert parsed[1] == datetime(2025, 4, 30, 8, 58, 32) assert parsed[2] == creation_time - assert parsed[3] == "NPR" + assert parsed[3] == "MIRS" assert parsed[4] == "n21" assert parsed[5] == "icec_atms_n21_l2" @@ -189,35 +189,35 @@ def test_get_valid_files(db): valid_files_ma1 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="ma1", obs_type="icec_amsu_ma1_l2") valid_files_n20 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="n20", obs_type="icec_atms_n20_l2") valid_files_n21 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="n21", obs_type="icec_atms_n21_l2") valid_files_npp = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="npp", obs_type="icec_atms_npp_l2") valid_files_gpm = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="gpm", obs_type="icec_gmi_gpm_l2") @@ -252,7 +252,7 @@ def test_get_valid_files_receipt(db): valid_files_ma1 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="ma1", obs_type="icec_amsu_ma1_l2", check_receipt="gfs") @@ -260,7 +260,7 @@ def test_get_valid_files_receipt(db): valid_files_n20 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="n20", obs_type="icec_atms_n20_l2", check_receipt="gfs") @@ -268,7 +268,7 @@ def test_get_valid_files_receipt(db): valid_files_n21 = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="n21", obs_type="icec_atms_n21_l2", check_receipt="gfs") @@ -276,7 +276,7 @@ def test_get_valid_files_receipt(db): valid_files_npp = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="npp", obs_type="icec_atms_npp_l2", check_receipt="gfs") @@ -284,7 +284,7 @@ def test_get_valid_files_receipt(db): valid_files_gpm = db.get_valid_files(window_begin=window_begin, window_end=window_end, dst_dir=dst_dir, - instrument="NPR", + instrument="MIRS", satellite="gpm", obs_type="icec_gmi_gpm_l2", check_receipt="gfs") From 24590103c82d9fe2a05a9703f2134c064763053c Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 09:06:44 -0500 Subject: [PATCH 04/10] config test case --- parm/config.hercules.yaml | 33 +++++++++----------- ush/python/pyobsforge/task/marine_prepobs.py | 23 ++++++++++++++ ush/python/pyobsforge/task/providers.py | 13 ++++++++ 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/parm/config.hercules.yaml b/parm/config.hercules.yaml index 379d057b..70a0ef17 100644 --- a/parm/config.hercules.yaml +++ b/parm/config.hercules.yaml @@ -1,15 +1,15 @@ obsforge: PSLOT: obsforge - HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge + HOMEobsforge: /work2/noaa/da/mchoi3/temp/obsForge SDATE: 202503141800 EDATE: 202503150000 - COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT - DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom - DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS + COMROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/COMROOT + DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom + DATAROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/RUNDIRS SCHEDULER: slurm ACCOUNT: da-cpu QUEUE: debug - PARTITION: hera + PARTITION: hercules KEEPDATA: NO assim_freq: 6 @@ -27,15 +27,8 @@ marinedump: providers: ghrsst: list: - - sst_viirs_n21_l3u - - sst_viirs_n20_l3u - - sst_viirs_npp_l3u - sst_avhrrf_ma_l3u - sst_avhrrf_mb_l3u - - sst_avhrrf_mc_l3u - - sst_ahi_h08_l3c - - sst_abi_g17_l3c - - sst_abi_g16_l3c qc config: min: -2.0 max: 45.0 @@ -45,12 +38,6 @@ marinedump: list: - rads_adt_3a - rads_adt_3b - - rads_adt_6a - - rads_adt_c2 - - rads_adt_j2 - - rads_adt_j3 - - rads_adt_sa - - rads_adt_sw qc config: min: -2.0 max: 3.0 @@ -62,6 +49,16 @@ marinedump: qc config: min: 0.0 max: 1.0 + nesdis_mirs: + list: + - icec_amsu_ma1_l2 + - icec_atms_n20_l2 + - icec_atms_n21_l2 + - icec_atms_npp_l2 + - icec_gmi_gpm_l2 + qc config: + min: 0.0 + max: 1.0 smap: list: - sss_smap_l2 diff --git a/ush/python/pyobsforge/task/marine_prepobs.py b/ush/python/pyobsforge/task/marine_prepobs.py index 10bcd2d9..8304ab7a 100644 --- a/ush/python/pyobsforge/task/marine_prepobs.py +++ b/ush/python/pyobsforge/task/marine_prepobs.py @@ -38,6 +38,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self.ghrsst = ProviderConfig.from_task_config("ghrsst", self.task_config) self.rads = ProviderConfig.from_task_config("rads", self.task_config) self.nesdis_amsr2 = ProviderConfig.from_task_config("nesdis_amsr2", self.task_config) + self.nesdis_mirs = ProviderConfig.from_task_config("nesdis_mirs", self.task_config.marinedump) self.smap = ProviderConfig.from_task_config("smap", self.task_config) self.smos = ProviderConfig.from_task_config("smos", self.task_config) @@ -54,6 +55,7 @@ def initialize(self) -> None: self.ghrsst.db.ingest_files() self.rads.db.ingest_files() self.nesdis_amsr2.db.ingest_files() + self.nesdis_mirs.db.ingest_files() self.smap.db.ingest_files() self.smos.db.ingest_files() @@ -155,6 +157,27 @@ def process_obs_space(self, result = self.nesdis_amsr2.process_obs_space(**kwargs) return result + # Process NESDIS_MIRS + if provider == "nesdis_mirs": + # Handling all mirs cases + platform = obs_space.split("_")[2] + instrument = "MIRS" + satellite = obs_space.split("_")[2] + kwargs = { + 'provider': "mirs", + 'obs_space': obs_space, + 'platform': platform, + 'instrument': instrument, + 'satellite': satellite, + 'obs_type': obs_space, + 'output_file': output_file, + 'window_begin': self.task_config.window_begin, + 'window_end': self.task_config.window_end, + 'task_config': self.task_config + } + result = self.nesdis_mirs.process_obs_space(**kwargs) + return result + # Process SMAP if provider == "smap": platform = None diff --git a/ush/python/pyobsforge/task/providers.py b/ush/python/pyobsforge/task/providers.py index a7c530cc..d2682d9d 100644 --- a/ush/python/pyobsforge/task/providers.py +++ b/ush/python/pyobsforge/task/providers.py @@ -2,6 +2,7 @@ from pyobsforge.obsdb.ghrsst_db import GhrSstDatabase from pyobsforge.obsdb.rads_db import RADSDatabase from pyobsforge.obsdb.nesdis_amsr2_db import NesdisAmsr2Database +from pyobsforge.obsdb.nesdis_mirs_db import NesdisMirsDatabase from pyobsforge.obsdb.smap_db import SmapDatabase from pyobsforge.obsdb.smos_db import SmosDatabase from typing import Any @@ -53,6 +54,9 @@ def __init__(self, qc_config: QCConfig, db: Any): # Replace `Any` with a more s @classmethod def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "ProviderConfig": + + print(f"All provider keys: {list(task_config.providers.keys())}") + qc_raw = task_config.providers[provider_name]["qc config"] qc = QCConfig.from_dict(qc_raw) @@ -64,6 +68,15 @@ def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "Provide db = RADSDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wgrdbul/adt") elif provider_name == "nesdis_amsr2": db = NesdisAmsr2Database(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="seaice/pda") + elif provider_name == "nesdis_mirs": + obs_dirs = [ + "seaice_amsu", + "seaice_atms_j1", + "seaice_atms_j2", + "seaice_atms_snpp", + "seaice_mirs" + ] + db = NesdisMirsDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir=obs_dirs) elif provider_name == "smap": db = SmapDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wtxtbul/satSSS/SMAP") elif provider_name == "smos": From 74f86514d51c1acd4b6831cd9d219fc21863c07a Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 09:44:02 -0500 Subject: [PATCH 05/10] draft pytest --- ush/python/pyobsforge/task/marine_prepobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/python/pyobsforge/task/marine_prepobs.py b/ush/python/pyobsforge/task/marine_prepobs.py index 8304ab7a..c9fad4a3 100644 --- a/ush/python/pyobsforge/task/marine_prepobs.py +++ b/ush/python/pyobsforge/task/marine_prepobs.py @@ -38,7 +38,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self.ghrsst = ProviderConfig.from_task_config("ghrsst", self.task_config) self.rads = ProviderConfig.from_task_config("rads", self.task_config) self.nesdis_amsr2 = ProviderConfig.from_task_config("nesdis_amsr2", self.task_config) - self.nesdis_mirs = ProviderConfig.from_task_config("nesdis_mirs", self.task_config.marinedump) + self.nesdis_mirs = ProviderConfig.from_task_config("nesdis_mirs", self.task_config) self.smap = ProviderConfig.from_task_config("smap", self.task_config) self.smos = ProviderConfig.from_task_config("smos", self.task_config) From e0a8858eb9724cf9263f3cfebfe5c617fb818798 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 13:12:44 -0500 Subject: [PATCH 06/10] fix code style --- ush/python/pyobsforge/obsdb/nesdis_mirs_db.py | 2 +- .../tests/test_nesdis_mirs_database.py | 112 +++++++++--------- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py index 0fd44ef4..c261094a 100644 --- a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py +++ b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py @@ -85,7 +85,7 @@ def ingest_files(self): for base in self.base_dir: matched = glob.glob(os.path.join(base, "*.nc")) obs_files.extend(matched) - + ingested_count = 0 for file in obs_files: parsed_data = self.parse_filename(file) diff --git a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py index 9c24ac8f..85613bac 100644 --- a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py +++ b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py @@ -136,7 +136,7 @@ def test_parse_valid_filename(db): if matches: found_files.extend(matches) - assert found_files, f"{target_name} not found in any db.base_dir paths" + assert found_files, f"{fname} not found in any db.base_dir paths" fname = found_files[0] # Parse filename @@ -187,39 +187,39 @@ def test_get_valid_files(db): dst_dir = 'icec' # Test for MIRS ICEC valid_files_ma1 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="ma1", - obs_type="icec_amsu_ma1_l2") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="ma1", + obs_type="icec_amsu_ma1_l2") valid_files_n20 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="n20", - obs_type="icec_atms_n20_l2") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="n20", + obs_type="icec_atms_n20_l2") valid_files_n21 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="n21", - obs_type="icec_atms_n21_l2") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="n21", + obs_type="icec_atms_n21_l2") valid_files_npp = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="npp", - obs_type="icec_atms_npp_l2") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="npp", + obs_type="icec_atms_npp_l2") valid_files_gpm = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="gpm", - obs_type="icec_gmi_gpm_l2") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="gpm", + obs_type="icec_gmi_gpm_l2") valid_files = ( valid_files_ma1 + valid_files_n20 + @@ -250,44 +250,44 @@ def test_get_valid_files_receipt(db): # Test for MIRS ICEC valid_files_ma1 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="ma1", - obs_type="icec_amsu_ma1_l2", - check_receipt="gfs") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="ma1", + obs_type="icec_amsu_ma1_l2", + check_receipt="gfs") valid_files_n20 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="n20", - obs_type="icec_atms_n20_l2", - check_receipt="gfs") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="n20", + obs_type="icec_atms_n20_l2", + check_receipt="gfs") valid_files_n21 = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="n21", - obs_type="icec_atms_n21_l2", - check_receipt="gfs") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="n21", + obs_type="icec_atms_n21_l2", + check_receipt="gfs") valid_files_npp = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="npp", - obs_type="icec_atms_npp_l2", - check_receipt="gfs") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="npp", + obs_type="icec_atms_npp_l2", + check_receipt="gfs") valid_files_gpm = db.get_valid_files(window_begin=window_begin, - window_end=window_end, - dst_dir=dst_dir, - instrument="MIRS", - satellite="gpm", - obs_type="icec_gmi_gpm_l2", - check_receipt="gfs") + window_end=window_end, + dst_dir=dst_dir, + instrument="MIRS", + satellite="gpm", + obs_type="icec_gmi_gpm_l2", + check_receipt="gfs") valid_files = ( valid_files_ma1 + valid_files_n20 + From 56679e33dcde8871f1b7ebbcd5d4bfbe2f2b37ec Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 13:22:56 -0500 Subject: [PATCH 07/10] fix code style --- ush/python/pyobsforge/obsdb/nesdis_mirs_db.py | 4 +--- .../tests/test_nesdis_mirs_database.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py index c261094a..0f984543 100644 --- a/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py +++ b/ush/python/pyobsforge/obsdb/nesdis_mirs_db.py @@ -7,9 +7,7 @@ class NesdisMirsDatabase(BaseDatabase): """Class to manage an observation file database for data assimilation.""" - def __init__(self, obs_dirs, - db_name="nesdis_mirs.db", - dcom_dir="/lfs/h1/ops/prod/dcom/"): + def __init__(self, obs_dirs, db_name="nesdis_mirs.db", dcom_dir="/lfs/h1/ops/prod/dcom/"): base_dirs = [os.path.join(dcom_dir, '*', obs_dir) for obs_dir in obs_dirs] super().__init__(db_name=db_name, base_dir=base_dirs) diff --git a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py index 85613bac..6b8fbd86 100644 --- a/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py +++ b/ush/python/pyobsforge/tests/test_nesdis_mirs_database.py @@ -222,9 +222,11 @@ def test_get_valid_files(db): obs_type="icec_gmi_gpm_l2") valid_files = ( - valid_files_ma1 + valid_files_n20 + - valid_files_n21 + valid_files_npp + - valid_files_gpm + valid_files_ma1 + + valid_files_n20 + + valid_files_n21 + + valid_files_npp + + valid_files_gpm ) # Files at 10:00 and 12:00 are within +/- 3h of 00:00 @@ -290,9 +292,11 @@ def test_get_valid_files_receipt(db): check_receipt="gfs") valid_files = ( - valid_files_ma1 + valid_files_n20 + - valid_files_n21 + valid_files_npp + - valid_files_gpm + valid_files_ma1 + + valid_files_n20 + + valid_files_n21 + + valid_files_npp + + valid_files_gpm ) print("Valid files found:", len(valid_files)) From 9556029b41a343f328dca04a139244933cb8e6f4 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 16:02:10 -0500 Subject: [PATCH 08/10] tested version --- parm/config.hera.yaml | 20 ++++++++++++++---- parm/config.hercules.yaml | 19 +++++++++++++++-- parm/config.orion.yaml | 28 ++++++++++++++++++------- parm/config.yaml | 2 +- ush/python/pyobsforge/task/providers.py | 5 +---- 5 files changed, 55 insertions(+), 19 deletions(-) diff --git a/parm/config.hera.yaml b/parm/config.hera.yaml index 379d057b..823416b7 100644 --- a/parm/config.hera.yaml +++ b/parm/config.hera.yaml @@ -1,8 +1,8 @@ obsforge: PSLOT: obsforge - HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge - SDATE: 202503141800 - EDATE: 202503150000 + HOMEobsforge: //scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge + SDATE: 202504281800 + EDATE: 202504300000 COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS @@ -50,7 +50,9 @@ marinedump: - rads_adt_j2 - rads_adt_j3 - rads_adt_sa - - rads_adt_sw + - rads_adt_sw + - rads_adt_3a + - rads_adt_3b qc config: min: -2.0 max: 3.0 @@ -62,6 +64,16 @@ marinedump: qc config: min: 0.0 max: 1.0 + nesdis_mirs: + list: + - icec_amsu_ma1_l2 + - icec_atms_n20_l2 + - icec_atms_n21_l2 + - icec_atms_npp_l2 + - icec_gmi_gpm_l2 + qc config: + min: 0.0 + max: 1.0 smap: list: - sss_smap_l2 diff --git a/parm/config.hercules.yaml b/parm/config.hercules.yaml index 70a0ef17..0e84dc59 100644 --- a/parm/config.hercules.yaml +++ b/parm/config.hercules.yaml @@ -1,8 +1,8 @@ obsforge: PSLOT: obsforge HOMEobsforge: /work2/noaa/da/mchoi3/temp/obsForge - SDATE: 202503141800 - EDATE: 202503150000 + SDATE: 202504281800 + EDATE: 202504300000 COMROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/COMROOT DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom DATAROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/RUNDIRS @@ -27,8 +27,15 @@ marinedump: providers: ghrsst: list: + - sst_viirs_n21_l3u + - sst_viirs_n20_l3u + - sst_viirs_npp_l3u - sst_avhrrf_ma_l3u - sst_avhrrf_mb_l3u + - sst_avhrrf_mc_l3u + - sst_ahi_h08_l3c + - sst_abi_g17_l3c + - sst_abi_g16_l3c qc config: min: -2.0 max: 45.0 @@ -38,6 +45,14 @@ marinedump: list: - rads_adt_3a - rads_adt_3b + - rads_adt_6a + - rads_adt_c2 + - rads_adt_j2 + - rads_adt_j3 + - rads_adt_sa + - rads_adt_sw + - rads_adt_3a + - rads_adt_3b qc config: min: -2.0 max: 3.0 diff --git a/parm/config.orion.yaml b/parm/config.orion.yaml index 379d057b..f9cdcd1a 100644 --- a/parm/config.orion.yaml +++ b/parm/config.orion.yaml @@ -1,15 +1,15 @@ obsforge: PSLOT: obsforge - HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge - SDATE: 202503141800 - EDATE: 202503150000 - COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT - DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom - DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS + HOMEobsforge: /work2/noaa/da/mchoi3/temp/obsForge + SDATE: 202504281800 + EDATE: 202504300000 + COMROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/COMROOT + DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom + DATAROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/RUNDIRS SCHEDULER: slurm ACCOUNT: da-cpu QUEUE: debug - PARTITION: hera + PARTITION: orion KEEPDATA: NO assim_freq: 6 @@ -50,7 +50,9 @@ marinedump: - rads_adt_j2 - rads_adt_j3 - rads_adt_sa - - rads_adt_sw + - rads_adt_sw + - rads_adt_3a + - rads_adt_3b qc config: min: -2.0 max: 3.0 @@ -62,6 +64,16 @@ marinedump: qc config: min: 0.0 max: 1.0 + nesdis_mirs: + list: + - icec_amsu_ma1_l2 + - icec_atms_n20_l2 + - icec_atms_n21_l2 + - icec_atms_npp_l2 + - icec_gmi_gpm_l2 + qc config: + min: 0.0 + max: 1.0 smap: list: - sss_smap_l2 diff --git a/parm/config.yaml b/parm/config.yaml index bcb9e07b..7d8d61b4 120000 --- a/parm/config.yaml +++ b/parm/config.yaml @@ -1 +1 @@ -config.hera.yaml \ No newline at end of file +config.hercules.yaml \ No newline at end of file diff --git a/ush/python/pyobsforge/task/providers.py b/ush/python/pyobsforge/task/providers.py index d2682d9d..68f141c3 100644 --- a/ush/python/pyobsforge/task/providers.py +++ b/ush/python/pyobsforge/task/providers.py @@ -54,9 +54,6 @@ def __init__(self, qc_config: QCConfig, db: Any): # Replace `Any` with a more s @classmethod def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "ProviderConfig": - - print(f"All provider keys: {list(task_config.providers.keys())}") - qc_raw = task_config.providers[provider_name]["qc config"] qc = QCConfig.from_dict(qc_raw) @@ -76,7 +73,7 @@ def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "Provide "seaice_atms_snpp", "seaice_mirs" ] - db = NesdisMirsDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir=obs_dirs) + db = NesdisMirsDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dirs=obs_dirs) elif provider_name == "smap": db = SmapDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wtxtbul/satSSS/SMAP") elif provider_name == "smos": From f6097190db28f60dc76fa935b837c8ed9623ad89 Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Thu, 8 May 2025 16:03:38 -0500 Subject: [PATCH 09/10] fix mistypo --- parm/config.hera.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/config.hera.yaml b/parm/config.hera.yaml index 823416b7..d15e978f 100644 --- a/parm/config.hera.yaml +++ b/parm/config.hera.yaml @@ -1,6 +1,6 @@ obsforge: PSLOT: obsforge - HOMEobsforge: //scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge + HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge SDATE: 202504281800 EDATE: 202504300000 COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT From c565cc6940078e3c9a33b15efab353f44b2bdffe Mon Sep 17 00:00:00 2001 From: Mindo Choi Date: Fri, 9 May 2025 11:25:53 -0500 Subject: [PATCH 10/10] commented out gpm --- parm/config.hera.yaml | 2 +- parm/config.hercules.yaml | 2 +- parm/config.orion.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/config.hera.yaml b/parm/config.hera.yaml index d15e978f..1dee5c59 100644 --- a/parm/config.hera.yaml +++ b/parm/config.hera.yaml @@ -70,7 +70,7 @@ marinedump: - icec_atms_n20_l2 - icec_atms_n21_l2 - icec_atms_npp_l2 - - icec_gmi_gpm_l2 +# - icec_gmi_gpm_l2 qc config: min: 0.0 max: 1.0 diff --git a/parm/config.hercules.yaml b/parm/config.hercules.yaml index 0e84dc59..be9ce00c 100644 --- a/parm/config.hercules.yaml +++ b/parm/config.hercules.yaml @@ -70,7 +70,7 @@ marinedump: - icec_atms_n20_l2 - icec_atms_n21_l2 - icec_atms_npp_l2 - - icec_gmi_gpm_l2 +# - icec_gmi_gpm_l2 qc config: min: 0.0 max: 1.0 diff --git a/parm/config.orion.yaml b/parm/config.orion.yaml index f9cdcd1a..4c5be6d9 100644 --- a/parm/config.orion.yaml +++ b/parm/config.orion.yaml @@ -70,7 +70,7 @@ marinedump: - icec_atms_n20_l2 - icec_atms_n21_l2 - icec_atms_npp_l2 - - icec_gmi_gpm_l2 +# - icec_gmi_gpm_l2 qc config: min: 0.0 max: 1.0