diff --git a/parm/config.hera.yaml b/parm/config.hera.yaml new file mode 100644 index 00000000..379d057b --- /dev/null +++ b/parm/config.hera.yaml @@ -0,0 +1,80 @@ +obsforge: + PSLOT: obsforge + HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge + SDATE: 202503141800 + EDATE: 202503150000 + COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT + DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom + DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS + SCHEDULER: slurm + ACCOUNT: da-cpu + QUEUE: debug + PARTITION: hera + KEEPDATA: NO + assim_freq: 6 + +aoddump: + provider: VIIRSAOD + platforms: ['npp', 'n20', 'n21'] + thinning_threshold: 0 + channel: 4 + preqc: 2 + WALLTIME_AOD_DUMP: '00:10:00' + TASK_GEOM_AOD_DUMP: '1:ppn=1:tpp=1' + MEMORY_AOD_DUMP: 96GB + +marinedump: + providers: + ghrsst: + list: + - sst_viirs_n21_l3u + - sst_viirs_n20_l3u + - sst_viirs_npp_l3u + - sst_avhrrf_ma_l3u + - sst_avhrrf_mb_l3u + - sst_avhrrf_mc_l3u + - sst_ahi_h08_l3c + - sst_abi_g17_l3c + - sst_abi_g16_l3c + qc config: + min: -2.0 + max: 45.0 + stride: 15 + min number of obs: 10 + rads: + list: + - rads_adt_3a + - rads_adt_3b + - rads_adt_6a + - rads_adt_c2 + - rads_adt_j2 + - rads_adt_j3 + - rads_adt_sa + - rads_adt_sw + qc config: + min: -2.0 + max: 3.0 + error ratio: 1.0 + nesdis_amsr2: + list: + - icec_amsr2_north + - icec_amsr2_south + qc config: + min: 0.0 + max: 1.0 + smap: + list: + - sss_smap_l2 + qc config: + min: 0.1 + max: 40.0 + smos: + list: + - sss_smos_l2 + qc config: + min: 0.1 + max: 40.0 + + WALLTIME_MARINE_DUMP: '00:10:00' + TASK_GEOM_MARINE_DUMP: '1:ppn=20:tpp=2' + MEMORY_MARINE_DUMP: 32GB diff --git a/parm/config.hercules.yaml b/parm/config.hercules.yaml new file mode 100644 index 00000000..379d057b --- /dev/null +++ b/parm/config.hercules.yaml @@ -0,0 +1,80 @@ +obsforge: + PSLOT: obsforge + HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge + SDATE: 202503141800 + EDATE: 202503150000 + COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT + DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom + DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS + SCHEDULER: slurm + ACCOUNT: da-cpu + QUEUE: debug + PARTITION: hera + KEEPDATA: NO + assim_freq: 6 + +aoddump: + provider: VIIRSAOD + platforms: ['npp', 'n20', 'n21'] + thinning_threshold: 0 + channel: 4 + preqc: 2 + WALLTIME_AOD_DUMP: '00:10:00' + TASK_GEOM_AOD_DUMP: '1:ppn=1:tpp=1' + MEMORY_AOD_DUMP: 96GB + +marinedump: + providers: + ghrsst: + list: + - sst_viirs_n21_l3u + - sst_viirs_n20_l3u + - sst_viirs_npp_l3u + - sst_avhrrf_ma_l3u + - sst_avhrrf_mb_l3u + - sst_avhrrf_mc_l3u + - sst_ahi_h08_l3c + - sst_abi_g17_l3c + - sst_abi_g16_l3c + qc config: + min: -2.0 + max: 45.0 + stride: 15 + min number of obs: 10 + rads: + list: + - rads_adt_3a + - rads_adt_3b + - rads_adt_6a + - rads_adt_c2 + - rads_adt_j2 + - rads_adt_j3 + - rads_adt_sa + - rads_adt_sw + qc config: + min: -2.0 + max: 3.0 + error ratio: 1.0 + nesdis_amsr2: + list: + - icec_amsr2_north + - icec_amsr2_south + qc config: + min: 0.0 + max: 1.0 + smap: + list: + - sss_smap_l2 + qc config: + min: 0.1 + max: 40.0 + smos: + list: + - sss_smos_l2 + qc config: + min: 0.1 + max: 40.0 + + WALLTIME_MARINE_DUMP: '00:10:00' + TASK_GEOM_MARINE_DUMP: '1:ppn=20:tpp=2' + MEMORY_MARINE_DUMP: 32GB diff --git a/parm/config.orion.yaml b/parm/config.orion.yaml index 98108e3b..379d057b 100644 --- a/parm/config.orion.yaml +++ b/parm/config.orion.yaml @@ -1,15 +1,15 @@ obsforge: PSLOT: obsforge - HOMEobsforge: /work/noaa/da/gvernier/runs/obsForge + HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge SDATE: 202503141800 EDATE: 202503150000 - COMROOT: /work/noaa/da/gvernier/runs/test_obsforge/COMROOT - DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom - DATAROOT: /work/noaa/da/gvernier/runs/test_obsforge/RUNDIRS + COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT + DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom + DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS SCHEDULER: slurm ACCOUNT: da-cpu QUEUE: debug - PARTITION: orion + PARTITION: hera KEEPDATA: NO assim_freq: 6 @@ -55,6 +55,25 @@ marinedump: min: -2.0 max: 3.0 error ratio: 1.0 + nesdis_amsr2: + list: + - icec_amsr2_north + - icec_amsr2_south + qc config: + min: 0.0 + max: 1.0 + smap: + list: + - sss_smap_l2 + qc config: + min: 0.1 + max: 40.0 + smos: + list: + - sss_smos_l2 + qc config: + min: 0.1 + max: 40.0 WALLTIME_MARINE_DUMP: '00:10:00' TASK_GEOM_MARINE_DUMP: '1:ppn=20:tpp=2' diff --git a/parm/config.yaml b/parm/config.yaml index 1d8374ce..bcb9e07b 120000 --- a/parm/config.yaml +++ b/parm/config.yaml @@ -1 +1 @@ -config.orion.yaml \ No newline at end of file +config.hera.yaml \ No newline at end of file diff --git a/ush/python/pyobsforge/obsdb/nesdis_amsr2_db.py b/ush/python/pyobsforge/obsdb/nesdis_amsr2_db.py new file mode 100644 index 00000000..1debdd41 --- /dev/null +++ b/ush/python/pyobsforge/obsdb/nesdis_amsr2_db.py @@ -0,0 +1,100 @@ +import os +import glob +from datetime import datetime +from pyobsforge.obsdb import BaseDatabase + + +class NesdisAmsr2Database(BaseDatabase): + """Class to manage an observation file database for data assimilation.""" + + def __init__(self, db_name="nesdis_amsr2.db", + dcom_dir="/lfs/h1/ops/prod/dcom/", + obs_dir="seaice/pda"): + base_dir = os.path.join(dcom_dir, '*', obs_dir) + super().__init__(db_name, base_dir) + + def create_database(self): + """ + Create the SQLite database and observation files table. + + This method initializes the database with a table named `obs_files` to store metadata + about observation files. The table contains the following columns: + + - `id`: A unique identifier for each record (auto-incremented primary key). + - `filename`: The full path to the observation file (must be unique). + - `obs_time`: The timestamp of the observation, extracted from the filename. + - `receipt_time`: The timestamp when the file was added to the `dcom` directory. + - `instrument`: The instrument used to collect the observation (e.g., AMSR2). + - `satellite`: The satellite from which the observation was collected (e.g., GW1). + - `obs_type`: The type of observation (e.g., SEAICE) + + The table is created if it does not already exist. + """ + query = """ + CREATE TABLE IF NOT EXISTS obs_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT UNIQUE, + obs_time TIMESTAMP, + receipt_time TIMESTAMP, + instrument TEXT, + satellite TEXT, + obs_type TEXT + ) + """ + self.execute_query(query) + + def parse_filename(self, filename): + """Extract metadata from filenames matching the AMSR2-SEAICE pattern.""" + # Make sure the filename matches the expected pattern + # Pattern: AMSR2-SEAICE-NH_v2r2_GW1_s202503140032240_e202503140211220_c202503140245560.nc + parts = os.path.basename(filename).split('_') + + # Pre-check: Must be an AMSR2-SEAICE file + if not parts[0].startswith("AMSR2-SEAICE"): + print(f"[DEBUG] Skipping non AMSR2-SEAICE file: {filename}") + return None + + try: + # Extract hemisphere from the first hyphen-separated segment + name_parts = parts[0].split('-') + instrument = name_parts[0] + hemisphere = name_parts[2].lower() + + # Determine obs_type + if hemisphere == "nh": + obs_type = "icec_amsr2_north" + elif hemisphere == "sh": + obs_type = "icec_amsr2_south" + else: + print(f"[DEBUG] Unrecognized hemisphere in filename: {filename}") + return None + + satellite = parts[2] + obs_time = datetime.strptime(parts[3][1:16], "%Y%m%d%H%M%S%f") + receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) + return filename, obs_time, receipt_time, instrument, satellite, obs_type + + except Exception as e: + print(f"[DEBUG] Error parsing filename {filename}: {e}") + return None + + def ingest_files(self): + """Scan the directory for new observation files and insert them into the database.""" + obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) + + # Counter for successful ingestions + ingested_count = 0 + + for file in obs_files: + parsed_data = self.parse_filename(file) + if parsed_data: + query = """ + INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) + VALUES (?, ?, ?, ?, ?, ?) + """ + try: + self.insert_record(query, parsed_data) + ingested_count += 1 + except Exception as e: + print(f"[DEBUG] Failed to insert record for {file}: {e}") + print(f"################################ Successfully ingested {ingested_count} files into the database.") diff --git a/ush/python/pyobsforge/obsdb/rads_db.py b/ush/python/pyobsforge/obsdb/rads_db.py index f4e97e96..9d33554a 100644 --- a/ush/python/pyobsforge/obsdb/rads_db.py +++ b/ush/python/pyobsforge/obsdb/rads_db.py @@ -9,7 +9,7 @@ class RADSDatabase(BaseDatabase): def __init__(self, db_name="rads.db", dcom_dir="/lfs/h1/ops/prod/dcom/", - obs_dir="sst"): + obs_dir="wgrdbul/adt"): base_dir = os.path.join(dcom_dir, '*', obs_dir) super().__init__(db_name, base_dir) diff --git a/ush/python/pyobsforge/obsdb/smap_db.py b/ush/python/pyobsforge/obsdb/smap_db.py new file mode 100644 index 00000000..fbe115e4 --- /dev/null +++ b/ush/python/pyobsforge/obsdb/smap_db.py @@ -0,0 +1,86 @@ +import os +import glob +from datetime import datetime +from pyobsforge.obsdb import BaseDatabase + + +class SmapDatabase(BaseDatabase): + """Class to manage an observation file database for data assimilation.""" + + def __init__(self, db_name="smap.db", + dcom_dir="/lfs/h1/ops/prod/dcom/", + obs_dir="wtxtbul/satSSS/SMAP"): + base_dir = os.path.join(dcom_dir, '*', obs_dir) + super().__init__(db_name, base_dir) + + def create_database(self): + """ + Create the SQLite database and observation files table. + + This method initializes the database with a table named `obs_files` to store metadata + about observation files. The table contains the following columns: + + - `id`: A unique identifier for each record (auto-incremented primary key). + - `filename`: The full path to the observation file (must be unique). + - `obs_time`: The timestamp of the observation, extracted from the filename. + - `receipt_time`: The timestamp when the file was added to the `dcom` directory. + - `satellite`: The satellite from which the observation was collected (e.g., GW1). + + The table is created if it does not already exist. + """ + query = """ + CREATE TABLE IF NOT EXISTS obs_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT UNIQUE, + obs_time TIMESTAMP, + receipt_time TIMESTAMP, + satellite TEXT, + obs_type TEXT + ) + """ + self.execute_query(query) + + def parse_filename(self, filename): + # Pattern: SMAP_L2B_SSS_NRT_54047_A_20250315T011742.h5 + basename = os.path.basename(filename) + parts = basename.split('_') + + # Pre-check: Must match SMAP_L2B_SSS_NRT structure + if not basename.startswith("SMAP_L2B_SSS_NRT") or len(parts) < 7: + print(f"[DEBUG] Skipping non-SMAP_L2B_SSS_NRT file: {filename}") + return None + + try: + satellite = "SMAP" + obs_type = "sss_smap_l2" + timestamp_with_ext = parts[6] + timestamp_str = os.path.splitext(timestamp_with_ext)[0] + obs_time = datetime.strptime(timestamp_str, "%Y%m%dT%H%M%S") + receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) + return filename, obs_time, receipt_time, satellite, obs_type + + except Exception as e: + print(f"[DEBUG] Error parsing filename {filename}: {e}") + return None + + def ingest_files(self): + """Scan the directory for new observation files and insert them into the database.""" + obs_files = glob.glob(os.path.join(self.base_dir, "*.h5")) + print(f"Found {len(obs_files)} new files to ingest") + + # Counter for successful ingestions + ingested_count = 0 + + for file in obs_files: + parsed_data = self.parse_filename(file) + if parsed_data: + query = """ + INSERT INTO obs_files (filename, obs_time, receipt_time, satellite, obs_type) + VALUES (?, ?, ?, ?, ?) + """ + try: + self.insert_record(query, parsed_data) + ingested_count += 1 + except Exception as e: + print(f"Failed to insert record for {file}: {e}") + print(f"################################ Successfully ingested {ingested_count} files into the database.") diff --git a/ush/python/pyobsforge/obsdb/smos_db.py b/ush/python/pyobsforge/obsdb/smos_db.py new file mode 100644 index 00000000..3ea89705 --- /dev/null +++ b/ush/python/pyobsforge/obsdb/smos_db.py @@ -0,0 +1,86 @@ +import os +import glob +from datetime import datetime +from pyobsforge.obsdb import BaseDatabase + + +class SmosDatabase(BaseDatabase): + """Class to manage an observation file database for data assimilation.""" + + def __init__(self, db_name="smos.db", + dcom_dir="/lfs/h1/ops/prod/dcom/", + obs_dir="wtxtbul/satSSS/SMOS"): + base_dir = os.path.join(dcom_dir, '*', obs_dir) + super().__init__(db_name, base_dir) + + def create_database(self): + """ + Create the SQLite database and observation files table. + + This method initializes the database with a table named `obs_files` to store metadata + about observation files. The table contains the following columns: + + - `id`: A unique identifier for each record (auto-incremented primary key). + - `filename`: The full path to the observation file (must be unique). + - `obs_time`: The timestamp of the observation, extracted from the filename. + - `receipt_time`: The timestamp when the file was added to the `dcom` directory. + - `satellite`: The satellite from which the observation was collected (e.g., GW1). + + The table is created if it does not already exist. + """ + query = """ + CREATE TABLE IF NOT EXISTS obs_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT UNIQUE, + obs_time TIMESTAMP, + receipt_time TIMESTAMP, + satellite TEXT, + obs_type TEXT + ) + """ + self.execute_query(query) + + def parse_filename(self, filename): + # Extract metadata from filenames matching the SMOS OSUDP2 pattern. + # Pattern: SM_OPER_MIR_OSUDP2_20250315T001156_20250315T010515_700_001_1.nc + basename = os.path.basename(filename) + parts = basename.split('_') + + # Pre-check: Must match expected prefix and structure + if not basename.startswith("SM_OPER_MIR_OSUDP") or len(parts) < 6: + print(f"[DEBUG] Skipping non-SMOS OSUDP2 file: {filename}") + return None + + try: + satellite = "SMOS" + obs_type = "sss_smos_l2" + start_time_str = parts[4] + obs_time = datetime.strptime(start_time_str, "%Y%m%dT%H%M%S") + receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) + return filename, obs_time, receipt_time, satellite, obs_type + + except Exception as e: + print(f"[DEBUG] Error parsing filename {filename}: {e}") + return None + + def ingest_files(self): + """Scan the directory for new observation files and insert them into the database.""" + obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) + print(f"Found {len(obs_files)} new files to ingest") + + # Counter for successful ingestions + ingested_count = 0 + + for file in obs_files: + parsed_data = self.parse_filename(file) + if parsed_data: + query = """ + INSERT INTO obs_files (filename, obs_time, receipt_time, satellite, obs_type) + VALUES (?, ?, ?, ?, ?) + """ + try: + self.insert_record(query, parsed_data) + ingested_count += 1 + except Exception as e: + print(f"Failed to insert record for {file}: {e}") + print(f"################################ Successfully ingested {ingested_count} files into the database.") diff --git a/ush/python/pyobsforge/task/marine_prepobs.py b/ush/python/pyobsforge/task/marine_prepobs.py index 7e184d14..10bcd2d9 100644 --- a/ush/python/pyobsforge/task/marine_prepobs.py +++ b/ush/python/pyobsforge/task/marine_prepobs.py @@ -37,6 +37,9 @@ def __init__(self, config: Dict[str, Any]) -> None: # Initialize the Providers self.ghrsst = ProviderConfig.from_task_config("ghrsst", self.task_config) self.rads = ProviderConfig.from_task_config("rads", self.task_config) + self.nesdis_amsr2 = ProviderConfig.from_task_config("nesdis_amsr2", self.task_config) + self.smap = ProviderConfig.from_task_config("smap", self.task_config) + self.smos = ProviderConfig.from_task_config("smos", self.task_config) # Initialize the list of processed ioda files # TODO: Does not work. This should be a list of gathered ioda files that are created @@ -50,6 +53,9 @@ def initialize(self) -> None: # Update the database with new files self.ghrsst.db.ingest_files() self.rads.db.ingest_files() + self.nesdis_amsr2.db.ingest_files() + self.smap.db.ingest_files() + self.smos.db.ingest_files() @logit(logger) def execute(self) -> None: @@ -127,6 +133,67 @@ def process_obs_space(self, } result = self.rads.process_obs_space(**kwargs) return result + + # Process NESDIS_AMSR2 + if provider == "nesdis_amsr2": + # Only handling "icec_amsr2_" cases + platform = "GW1" + instrument = "AMSR2" + satellite = "GW1" + kwargs = { + 'provider': "amsr2", + 'obs_space': obs_space, + 'platform': platform, + 'instrument': instrument, + 'satellite': satellite, + 'obs_type': obs_space, + 'output_file': output_file, + 'window_begin': self.task_config.window_begin, + 'window_end': self.task_config.window_end, + 'task_config': self.task_config + } + result = self.nesdis_amsr2.process_obs_space(**kwargs) + return result + + # Process SMAP + if provider == "smap": + platform = None + satellite = "SMAP" + instrument = None + kwargs = { + 'provider': provider, + 'obs_space': obs_space, + 'platform': platform, + 'instrument': instrument, + 'satellite': satellite, + 'obs_type': obs_space, + 'output_file': output_file, + 'window_begin': self.task_config.window_begin, + 'window_end': self.task_config.window_end, + 'task_config': self.task_config + } + result = self.smap.process_obs_space(**kwargs) + return result + + # Process SMOS SSS + if provider == "smos": + platform = None + satellite = "SMOS" + instrument = None + kwargs = { + 'provider': provider, + 'obs_space': obs_space, + 'platform': platform, + 'instrument': instrument, + 'satellite': satellite, + 'obs_type': obs_space, + 'output_file': output_file, + 'window_begin': self.task_config.window_begin, + 'window_end': self.task_config.window_end, + 'task_config': self.task_config + } + result = self.smos.process_obs_space(**kwargs) + return result else: logger.error(f"Provider {provider} not supported") diff --git a/ush/python/pyobsforge/task/providers.py b/ush/python/pyobsforge/task/providers.py index 2dd61077..a7c530cc 100644 --- a/ush/python/pyobsforge/task/providers.py +++ b/ush/python/pyobsforge/task/providers.py @@ -1,6 +1,9 @@ from logging import getLogger from pyobsforge.obsdb.ghrsst_db import GhrSstDatabase from pyobsforge.obsdb.rads_db import RADSDatabase +from pyobsforge.obsdb.nesdis_amsr2_db import NesdisAmsr2Database +from pyobsforge.obsdb.smap_db import SmapDatabase +from pyobsforge.obsdb.smos_db import SmosDatabase from typing import Any from dataclasses import dataclass from wxflow import AttrDict @@ -59,6 +62,12 @@ def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "Provide db = GhrSstDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="sst") elif provider_name == "rads": db = RADSDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wgrdbul/adt") + elif provider_name == "nesdis_amsr2": + db = NesdisAmsr2Database(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="seaice/pda") + elif provider_name == "smap": + db = SmapDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wtxtbul/satSSS/SMAP") + elif provider_name == "smos": + db = SmosDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wtxtbul/satSSS/SMOS") else: raise NotImplementedError(f"DB setup for provider {provider_name} not yet implemented") diff --git a/ush/python/pyobsforge/tests/test_nesdis_amsr2_database.py b/ush/python/pyobsforge/tests/test_nesdis_amsr2_database.py new file mode 100644 index 00000000..ef93e971 --- /dev/null +++ b/ush/python/pyobsforge/tests/test_nesdis_amsr2_database.py @@ -0,0 +1,185 @@ +import os +import glob +import tempfile +import shutil +import sqlite3 +from datetime import datetime, timedelta + +import pytest + +from pyobsforge.obsdb.nesdis_amsr2_db import NesdisAmsr2Database # Adjust as needed + + +@pytest.fixture +def temp_obs_dir(): + """Create a temp directory with mock NESDIS AMSR2 NetCDF files.""" + base_dir = tempfile.mkdtemp() + sub_dir = os.path.join(base_dir, "some_subdir", "seaice/pda") + os.makedirs(sub_dir) + + # Desired datetime for file timestamps + mock_time = datetime(2025, 3, 16, 0, 0, 0).timestamp() + + # Create mock NetCDF files + filenames = [ + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160020240_e202503160159220_c202503160230450.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160159240_e202503160338230_c202503160410050.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160338250_e202503160514230_c202503160545510.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160514240_e202503160653220_c202503160725420.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160653240_e202503160829230_c202503160902250.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503160829250_e202503161008230_c202503161121060.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161008240_e202503161147220_c202503161300120.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161147230_e202503161326230_c202503161357200.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161326240_e202503161502220_c202503161540340.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161502240_e202503161641220_c202503161715510.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161641230_e202503161820230_c202503161856520.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503161820240_e202503162002220_c202503162039030.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503162002240_e202503162144230_c202503162217280.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503162144250_e202503162323220_c202503162358480.nc", + "AMSR2-SEAICE-NH_v2r2_GW1_s202503162323240_e202503170102220_c202503170137120.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160020240_e202503160159220_c202503160230450.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160159240_e202503160338230_c202503160410050.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160338250_e202503160514230_c202503160545510.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160514240_e202503160653220_c202503160725420.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160653240_e202503160829230_c202503160902250.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503160829250_e202503161008230_c202503161121060.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161008240_e202503161147220_c202503161300120.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161147230_e202503161326230_c202503161357200.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161326240_e202503161502220_c202503161540340.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161502240_e202503161641220_c202503161715510.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161641230_e202503161820230_c202503161856520.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503161820240_e202503162002220_c202503162039030.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503162002240_e202503162144230_c202503162217280.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503162144250_e202503162323220_c202503162358480.nc", + "AMSR2-SEAICE-SH_v2r2_GW1_s202503162323240_e202503170102220_c202503170137120.nc", + "invalid_file.nc" + ] + for fname in filenames: + fname_tmp = os.path.join(sub_dir, fname) + with open(fname_tmp, "w") as f: + f.write("fake content") + os.utime(fname_tmp, (mock_time, mock_time)) # (access_time, modification_time) + + yield base_dir + shutil.rmtree(base_dir) + + +@pytest.fixture +def db(temp_obs_dir): + """Initialize test database.""" + db_path = os.path.join(temp_obs_dir, "nesdis_amsr2_test.db") + database = NesdisAmsr2Database( + db_name=db_path, + dcom_dir=temp_obs_dir, + obs_dir="seaice/pda" + ) + return database + + +def test_create_database(db): + db.create_database() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='obs_files'") + assert cursor.fetchone() is not None + conn.close() + + +def test_parse_valid_filename(db): + print(glob.glob(os.path.join(db.base_dir, "*"))) + fname = "AMSR2-SEAICE-NH_v2r2_GW1_s202503160653240_e202503160829230_c202503160902250.nc" + fname = glob.glob(os.path.join(db.base_dir, fname))[0] + parsed = db.parse_filename(fname) + creation_time = datetime.fromtimestamp(os.path.getctime(fname)) + + assert parsed is not None + assert parsed[0] == fname + assert parsed[1] == datetime(2025, 3, 16, 6, 53, 24) + assert parsed[2] == creation_time + assert parsed[3] == "AMSR2" + assert parsed[4] == "GW1" + # assert parsed[5] == "SEAICE" + assert parsed[5] == "icec_amsr2_north" + + +def test_parse_invalid_filename(db): + assert db.parse_filename("junk.nc") is None + assert db.parse_filename("AMSR2-SEAICE-NH_v2r2_GW1_invalid.nc") is None + + +def test_ingest_files(db): + db.ingest_files() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM obs_files") + count = cursor.fetchone()[0] + conn.close() + assert count == 30, "Should ingest 30 valid AMSR2 files" + + +def test_get_valid_files(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'icec' + # Test for AMSR2 ICEC + valid_files_north = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="AMSR2", + satellite="GW1", + obs_type="icec_amsr2_north") + + valid_files_south = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="AMSR2", + satellite="GW1", + obs_type="icec_amsr2_south") + + valid_files = valid_files_north + valid_files_south + + # Files at 10:00 and 12:00 are within +/- 3h of 00:00 + assert any("202503160514" in f for f in valid_files) + assert any("202503160653" in f for f in valid_files) + assert all("202503161326" not in f for f in valid_files) + + print("Valid files found:", len(valid_files)) + for f in valid_files: + print(" -", f) + assert len(valid_files) == 8 + + +def test_get_valid_files_receipt(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'icec' + + # Test for AMSR2 ICEC + valid_files_north = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="AMSR2", + satellite="GW1", + obs_type="icec_amsr2_north", + check_receipt="gfs") + + valid_files_south = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + instrument="AMSR2", + satellite="GW1", + obs_type="icec_amsr2_south", + check_receipt="gfs") + + valid_files = valid_files_north + valid_files_south + + print("Valid files found:", len(valid_files)) + for f in valid_files: + print(" -", f) + + # TODO (G): Giving up for now on trying to mock the receipt time, will revisit later + assert len(valid_files) == 8 diff --git a/ush/python/pyobsforge/tests/test_smap_database.py b/ush/python/pyobsforge/tests/test_smap_database.py new file mode 100644 index 00000000..94bec114 --- /dev/null +++ b/ush/python/pyobsforge/tests/test_smap_database.py @@ -0,0 +1,153 @@ +import os +import glob +import tempfile +import shutil +import sqlite3 +from datetime import datetime, timedelta + +import pytest + +from pyobsforge.obsdb.smap_db import SmapDatabase # Adjust as needed + + +@pytest.fixture +def temp_obs_dir(): + """Create a temp directory with mock SMAP_SSS h5 files.""" + base_dir = tempfile.mkdtemp() + sub_dir = os.path.join(base_dir, "some_subdir", "wtxtbul/satSSS/SMAP") + os.makedirs(sub_dir) + + # Desired datetime for file timestamps + mock_time = datetime(2025, 3, 16, 6, 0, 0).timestamp() + + # Create mock NetCDF files + filenames = [ + "SMAP_L2B_SSS_NRT_54061_A_20250316T001612.h5", + "SMAP_L2B_SSS_NRT_54061_D_20250316T001612.h5", + "SMAP_L2B_SSS_NRT_54062_A_20250316T015440.h5", + "SMAP_L2B_SSS_NRT_54062_D_20250316T015440.h5", + "SMAP_L2B_SSS_NRT_54063_A_20250316T033308.h5", + "SMAP_L2B_SSS_NRT_54063_D_20250316T033308.h5", + "SMAP_L2B_SSS_NRT_54064_A_20250316T051136.h5", + "SMAP_L2B_SSS_NRT_54064_D_20250316T051136.h5", + "SMAP_L2B_SSS_NRT_54065_A_20250316T065004.h5", + "SMAP_L2B_SSS_NRT_54065_D_20250316T065004.h5", + "SMAP_L2B_SSS_NRT_54066_A_20250316T082832.h5", + "SMAP_L2B_SSS_NRT_54066_D_20250316T082832.h5", + "SMAP_L2B_SSS_NRT_54067_A_20250316T100700.h5", + "SMAP_L2B_SSS_NRT_54067_D_20250316T100700.h5", + "SMAP_L2B_SSS_NRT_54068_D_20250316T114527.h5", + "SMAP_L2B_SSS_NRT_54069_A_20250316T132356.h5", + "SMAP_L2B_SSS_NRT_54069_D_20250316T132356.h5", + "SMAP_L2B_SSS_NRT_54070_A_20250316T150223.h5", + "SMAP_L2B_SSS_NRT_54070_D_20250316T150223.h5", + "SMAP_L2B_SSS_NRT_54071_A_20250316T164051.h5", + "SMAP_L2B_SSS_NRT_54071_D_20250316T164051.h5", + "SMAP_L2B_SSS_NRT_54072_A_20250316T181918.h5", + "SMAP_L2B_SSS_NRT_54072_D_20250316T181918.h5", + "SMAP_L2B_SSS_NRT_54073_A_20250316T195746.h5", + "SMAP_L2B_SSS_NRT_54073_D_20250316T195746.h5", + "SMAP_L2B_SSS_NRT_54074_A_20250316T213615.h5", + "SMAP_L2B_SSS_NRT_54074_D_20250316T213615.h5", + "SMAP_L2B_SSS_NRT_54075_A_20250316T231442.h5" + ] + for fname in filenames: + fname_tmp = os.path.join(sub_dir, fname) + with open(fname_tmp, "w") as f: + f.write("fake content") + os.utime(fname_tmp, (mock_time, mock_time)) # (access_time, modification_time) + + yield base_dir + shutil.rmtree(base_dir) + + +@pytest.fixture +def db(temp_obs_dir): + """Initialize test database.""" + db_path = os.path.join(temp_obs_dir, "smap_test.db") + database = SmapDatabase( + db_name=db_path, + dcom_dir=temp_obs_dir, + obs_dir="wtxtbul/satSSS/SMAP" + ) + return database + + +def test_create_database(db): + db.create_database() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='obs_files'") + assert cursor.fetchone() is not None + conn.close() + + +def test_parse_valid_filename(db): + print(glob.glob(os.path.join(db.base_dir, "*"))) + fname = "SMAP_L2B_SSS_NRT_54065_A_20250316T065004.h5" + fname = glob.glob(os.path.join(db.base_dir, fname))[0] + parsed = db.parse_filename(fname) + creation_time = datetime.fromtimestamp(os.path.getctime(fname)) + + assert parsed is not None + assert parsed[0] == fname + assert parsed[1] == datetime(2025, 3, 16, 6, 50, 4) + assert parsed[2] == creation_time + assert parsed[3] == "SMAP" + assert parsed[4] == "sss_smap_l2" + + +def test_parse_invalid_filename(db): + assert db.parse_filename("junk.nc") is None + assert db.parse_filename("SMAP_L2B_SSS_NRT_invalid.nc") is None + + +def test_ingest_files(db): + db.ingest_files() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM obs_files") + count = cursor.fetchone()[0] + conn.close() + assert count == 28, "Should ingest 28 valid SMAP files" + + +def test_get_valid_files(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'sss' + # Test for SMAP SSS + valid_files = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + obs_type="sss_smap_l2", + satellite="SMAP") + + print("Valid files in window:", valid_files) + + # Files at 03:00 and 09:00 are within +/- 3h of 06:00 + assert any("20250316T0511" in f for f in valid_files) + assert any("20250316T0650" in f for f in valid_files) + assert all("20250316T1007" not in f for f in valid_files) + assert len(valid_files) == 8 + + +def test_get_valid_files_receipt(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'sss' + + # Test for SMAP SSS + valid_files = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + satellite="SMAP", + obs_type="sss_smap_l2", + check_receipt='gfs') + + # TODO (G): Giving up for now on trying to mock the receipt time, will revisit later + assert len(valid_files) == 8 diff --git a/ush/python/pyobsforge/tests/test_smos_database.py b/ush/python/pyobsforge/tests/test_smos_database.py new file mode 100644 index 00000000..20701e03 --- /dev/null +++ b/ush/python/pyobsforge/tests/test_smos_database.py @@ -0,0 +1,153 @@ +import os +import glob +import tempfile +import shutil +import sqlite3 +from datetime import datetime, timedelta + +import pytest + +from pyobsforge.obsdb.smos_db import SmosDatabase # Adjust as needed + + +@pytest.fixture +def temp_obs_dir(): + """Create a temp directory with mock SMOS_SSS nc files.""" + base_dir = tempfile.mkdtemp() + sub_dir = os.path.join(base_dir, "some_subdir", "wtxtbul/satSSS/SMOS") + os.makedirs(sub_dir) + + # Desired datetime for file timestamps + mock_time = datetime(2025, 3, 16, 6, 0, 0).timestamp() + + # Create mock NetCDF files + filenames = [ + "SM_OPER_MIR_OSUDP2_20250316T002309_20250316T011621_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T011306_20250316T020624_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T020312_20250316T025626_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T025309_20250316T034629_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T034319_20250316T043631_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T043313_20250316T052634_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T052327_20250316T061636_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T061318_20250316T070637_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T070327_20250316T075640_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T075327_20250316T084642_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T084330_20250316T093645_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T093328_20250316T102647_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T102335_20250316T111649_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T111332_20250316T120652_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T120340_20250316T125654_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T125337_20250316T134656_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T134343_20250316T143658_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T143340_20250316T152700_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T152349_20250316T161703_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T161346_20250316T170705_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T170353_20250316T175708_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T175351_20250316T184710_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T184359_20250316T193713_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T193354_20250316T202714_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T202402_20250316T211716_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T211359_20250316T220719_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T220407_20250316T225721_700_001_1.nc", + "SM_OPER_MIR_OSUDP2_20250316T225404_20250316T234724_700_001_1.nc" + ] + for fname in filenames: + fname_tmp = os.path.join(sub_dir, fname) + with open(fname_tmp, "w") as f: + f.write("fake content") + os.utime(fname_tmp, (mock_time, mock_time)) # (access_time, modification_time) + + yield base_dir + shutil.rmtree(base_dir) + + +@pytest.fixture +def db(temp_obs_dir): + """Initialize test database.""" + db_path = os.path.join(temp_obs_dir, "smos_test.db") + database = SmosDatabase( + db_name=db_path, + dcom_dir=temp_obs_dir, + obs_dir="wtxtbul/satSSS/SMOS" + ) + return database + + +def test_create_database(db): + db.create_database() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='obs_files'") + assert cursor.fetchone() is not None + conn.close() + + +def test_parse_valid_filename(db): + print(glob.glob(os.path.join(db.base_dir, "*"))) + fname = "SM_OPER_MIR_OSUDP2_20250316T061318_20250316T070637_700_001_1.nc" + fname = glob.glob(os.path.join(db.base_dir, fname))[0] + parsed = db.parse_filename(fname) + creation_time = datetime.fromtimestamp(os.path.getctime(fname)) + + assert parsed is not None + assert parsed[0] == fname + assert parsed[1] == datetime(2025, 3, 16, 6, 13, 18) # Start time + assert parsed[2] == creation_time + assert parsed[3] == "SMOS" + assert parsed[4] == "sss_smos_l2" + + +def test_parse_invalid_filename(db): + assert db.parse_filename("junk.nc") is None + assert db.parse_filename("SM_OPER_MIR_OSUDP2_invalid.nc") is None + + +def test_ingest_files(db): + db.ingest_files() + conn = sqlite3.connect(db.db_name) + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM obs_files") + count = cursor.fetchone()[0] + conn.close() + assert count == 28, "Should ingest 28 valid SMAP files" + + +def test_get_valid_files(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'sss' + # Test for SMOS SSS + valid_files = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + obs_type="sss_smos_l2", + satellite="SMOS") + + print("Valid files in window:", valid_files) + + # Files at 03:00 and 09:00 are within +/- 3h of 06:00 + assert any("20250316T0523" in f for f in valid_files) + assert any("20250316T0613" in f for f in valid_files) + assert all("20250316T1023" not in f for f in valid_files) + assert len(valid_files) == 7 + + +def test_get_valid_files_receipt(db): + db.ingest_files() + da_cycle = "20250316060000" + window_begin = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") - timedelta(hours=3) + window_end = datetime.strptime(da_cycle, "%Y%m%d%H%M%S") + timedelta(hours=3) + dst_dir = 'sss' + + # Test for SMOS SSS + valid_files = db.get_valid_files(window_begin=window_begin, + window_end=window_end, + dst_dir=dst_dir, + satellite="SMOS", + obs_type="sss_smos_l2", + check_receipt='gfs') + + # TODO (G): Giving up for now on trying to mock the receipt time, will revisit later + assert len(valid_files) == 7