Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions parm/config.hera.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
obsforge:
PSLOT: obsforge
HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge
SDATE: 202503141800
EDATE: 202503150000
SDATE: 202504281800
EDATE: 202504300000
COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT
DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom
DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS
Expand Down Expand Up @@ -50,7 +50,9 @@ marinedump:
- rads_adt_j2
- rads_adt_j3
- rads_adt_sa
- rads_adt_sw
- rads_adt_sw
- rads_adt_3a
- rads_adt_3b
qc config:
min: -2.0
max: 3.0
Expand All @@ -62,6 +64,16 @@ marinedump:
qc config:
min: 0.0
max: 1.0
nesdis_mirs:
list:
- icec_amsu_ma1_l2
- icec_atms_n20_l2
- icec_atms_n21_l2
- icec_atms_npp_l2
# - icec_gmi_gpm_l2
qc config:
min: 0.0
max: 1.0
smap:
list:
- sss_smap_l2
Expand Down
28 changes: 20 additions & 8 deletions parm/config.hercules.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
obsforge:
PSLOT: obsforge
HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge
SDATE: 202503141800
EDATE: 202503150000
COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT
DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom
DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS
HOMEobsforge: /work2/noaa/da/mchoi3/temp/obsForge
SDATE: 202504281800
EDATE: 202504300000
COMROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/COMROOT
DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom
DATAROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/RUNDIRS
SCHEDULER: slurm
ACCOUNT: da-cpu
QUEUE: debug
PARTITION: hera
PARTITION: hercules
KEEPDATA: NO
assim_freq: 6

Expand Down Expand Up @@ -50,7 +50,9 @@ marinedump:
- rads_adt_j2
- rads_adt_j3
- rads_adt_sa
- rads_adt_sw
- rads_adt_sw
- rads_adt_3a
- rads_adt_3b
qc config:
min: -2.0
max: 3.0
Expand All @@ -62,6 +64,16 @@ marinedump:
qc config:
min: 0.0
max: 1.0
nesdis_mirs:
list:
- icec_amsu_ma1_l2
- icec_atms_n20_l2
- icec_atms_n21_l2
- icec_atms_npp_l2
# - icec_gmi_gpm_l2
qc config:
min: 0.0
max: 1.0
smap:
list:
- sss_smap_l2
Expand Down
28 changes: 20 additions & 8 deletions parm/config.orion.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
obsforge:
PSLOT: obsforge
HOMEobsforge: /scratch1/NCEPDEV/da/Mindo.Choi/test/obsForge
SDATE: 202503141800
EDATE: 202503150000
COMROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/COMROOT
DCOMROOT: /scratch1/NCEPDEV/da/common/realtime_sample/lfs/h1/ops/prod/dcom
DATAROOT: /scratch1/NCEPDEV/da/Mindo.Choi/test/test_obsForge/RUNDIRS
HOMEobsforge: /work2/noaa/da/mchoi3/temp/obsForge
SDATE: 202504281800
EDATE: 202504300000
COMROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/COMROOT
DCOMROOT: /work2/noaa/da/common/lfs/h1/ops/prod/dcom
DATAROOT: /work2/noaa/da/mchoi3/temp/test_obsForge/RUNDIRS
SCHEDULER: slurm
ACCOUNT: da-cpu
QUEUE: debug
PARTITION: hera
PARTITION: orion
KEEPDATA: NO
assim_freq: 6

Expand Down Expand Up @@ -50,7 +50,9 @@ marinedump:
- rads_adt_j2
- rads_adt_j3
- rads_adt_sa
- rads_adt_sw
- rads_adt_sw
- rads_adt_3a
- rads_adt_3b
qc config:
min: -2.0
max: 3.0
Expand All @@ -62,6 +64,16 @@ marinedump:
qc config:
min: 0.0
max: 1.0
nesdis_mirs:
list:
- icec_amsu_ma1_l2
- icec_atms_n20_l2
- icec_atms_n21_l2
- icec_atms_npp_l2
# - icec_gmi_gpm_l2
qc config:
min: 0.0
max: 1.0
smap:
list:
- sss_smap_l2
Expand Down
2 changes: 1 addition & 1 deletion parm/config.yaml
104 changes: 104 additions & 0 deletions ush/python/pyobsforge/obsdb/nesdis_mirs_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os
import glob
from datetime import datetime
from pyobsforge.obsdb import BaseDatabase


class NesdisMirsDatabase(BaseDatabase):
"""Class to manage an observation file database for data assimilation."""

def __init__(self, obs_dirs, db_name="nesdis_mirs.db", dcom_dir="/lfs/h1/ops/prod/dcom/"):
base_dirs = [os.path.join(dcom_dir, '*', obs_dir) for obs_dir in obs_dirs]
super().__init__(db_name=db_name, base_dir=base_dirs)

def create_database(self):
"""
Create the SQLite database and observation files table.

This method initializes the database with a table named `obs_files` to store metadata
about observation files. The table contains the following columns:

- `id`: A unique identifier for each record (auto-incremented primary key).
- `filename`: The full path to the observation file (must be unique).
- `obs_time`: The timestamp of the observation, extracted from the filename.
- `receipt_time`: The timestamp when the file was added to the `dcom` directory.
- `satellite`: The satellite from which the observation was collected (e.g., GW1).
- `obs_type`: The type of observation (e.g., SEAICE)

The table is created if it does not already exist.
"""
query = """
CREATE TABLE IF NOT EXISTS obs_files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT UNIQUE,
obs_time TIMESTAMP,
receipt_time TIMESTAMP,
instrument TEXT,
satellite TEXT,
obs_type TEXT
)
"""
self.execute_query(query)

def parse_filename(self, filename):
"""Extract metadata from filenames matching the MIRS-TYPE-SEAICE pattern
NPR-MIRS-IMG_v11r9_ma1_s202504300706550_e202504300756360_c202504300838450.nc
NPR-MIRS-IMG_v11r9_n20_s202504300858350_e202504300859066_c202504300933000.nc
NPR-MIRS-IMG_v11r9_n21_s202504300858324_e202504300859040_c202504300935130.nc
NPR-MIRS-IMG_v11r9_npp_s202504300858336_e202504300859053_c202504300916400.nc
NPR-MIRS-IMG_v11r9_gpm_s202504300848270_e202504300853250_c202504300912100.nc
"""
try:
fname = os.path.basename(filename)
parts = fname.split("_")

if len(parts) < 6 or not parts[3].startswith("s") or not parts[5].startswith("c"):
print(f"[DEBUG] Unexpected filename format: {fname}")
return None

instrument = parts[0].split("-")[1]
satellite = parts[2]
obs_type = {
"ma1": "icec_amsu_ma1_l2",
"n20": "icec_atms_n20_l2",
"n21": "icec_atms_n21_l2",
"npp": "icec_atms_npp_l2",
"gpm": "icec_gmi_gpm_l2"
}.get(satellite.lower(), None)

if obs_type is None:
print(f"[DEBUG] Unrecognized satellite: {satellite}")
return None

obs_time = datetime.strptime(parts[3][1:15], "%Y%m%d%H%M%S")
receipt_time = datetime.fromtimestamp(os.path.getctime(filename))
return filename, obs_time, receipt_time, instrument, satellite, obs_type

except Exception as e:
print(f"[ERROR] Failed to parse {filename}: {e}")
return None

def ingest_files(self):
obs_files = []
for base in self.base_dir:
matched = glob.glob(os.path.join(base, "*.nc"))
obs_files.extend(matched)

ingested_count = 0
for file in obs_files:
parsed_data = self.parse_filename(file)
if not parsed_data:
print(f"[WARN] Skipped (unparseable): {os.path.basename(file)}")
continue

query = """
INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type)
VALUES (?, ?, ?, ?, ?, ?)
"""
try:
self.insert_record(query, parsed_data)
ingested_count += 1
except Exception as e:
print(f"[ERROR] Failed to insert {file}: {e}")

print(f"[INFO] Successfully ingested {ingested_count} files into the database.")
23 changes: 23 additions & 0 deletions ush/python/pyobsforge/task/marine_prepobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.ghrsst = ProviderConfig.from_task_config("ghrsst", self.task_config)
self.rads = ProviderConfig.from_task_config("rads", self.task_config)
self.nesdis_amsr2 = ProviderConfig.from_task_config("nesdis_amsr2", self.task_config)
self.nesdis_mirs = ProviderConfig.from_task_config("nesdis_mirs", self.task_config)
self.smap = ProviderConfig.from_task_config("smap", self.task_config)
self.smos = ProviderConfig.from_task_config("smos", self.task_config)

Expand All @@ -54,6 +55,7 @@ def initialize(self) -> None:
self.ghrsst.db.ingest_files()
self.rads.db.ingest_files()
self.nesdis_amsr2.db.ingest_files()
self.nesdis_mirs.db.ingest_files()
self.smap.db.ingest_files()
self.smos.db.ingest_files()

Expand Down Expand Up @@ -155,6 +157,27 @@ def process_obs_space(self,
result = self.nesdis_amsr2.process_obs_space(**kwargs)
return result

# Process NESDIS_MIRS
if provider == "nesdis_mirs":
# Handling all mirs cases
platform = obs_space.split("_")[2]
instrument = "MIRS"
satellite = obs_space.split("_")[2]
kwargs = {
'provider': "mirs",
'obs_space': obs_space,
'platform': platform,
'instrument': instrument,
'satellite': satellite,
'obs_type': obs_space,
'output_file': output_file,
'window_begin': self.task_config.window_begin,
'window_end': self.task_config.window_end,
'task_config': self.task_config
}
result = self.nesdis_mirs.process_obs_space(**kwargs)
return result

# Process SMAP
if provider == "smap":
platform = None
Expand Down
10 changes: 10 additions & 0 deletions ush/python/pyobsforge/task/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pyobsforge.obsdb.ghrsst_db import GhrSstDatabase
from pyobsforge.obsdb.rads_db import RADSDatabase
from pyobsforge.obsdb.nesdis_amsr2_db import NesdisAmsr2Database
from pyobsforge.obsdb.nesdis_mirs_db import NesdisMirsDatabase
from pyobsforge.obsdb.smap_db import SmapDatabase
from pyobsforge.obsdb.smos_db import SmosDatabase
from typing import Any
Expand Down Expand Up @@ -64,6 +65,15 @@ def from_task_config(cls, provider_name: str, task_config: AttrDict) -> "Provide
db = RADSDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wgrdbul/adt")
elif provider_name == "nesdis_amsr2":
db = NesdisAmsr2Database(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="seaice/pda")
elif provider_name == "nesdis_mirs":
obs_dirs = [
"seaice_amsu",
"seaice_atms_j1",
"seaice_atms_j2",
"seaice_atms_snpp",
"seaice_mirs"
]
db = NesdisMirsDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dirs=obs_dirs)
elif provider_name == "smap":
db = SmapDatabase(db_name=f"{provider_name}.db", dcom_dir=task_config.DCOMROOT, obs_dir="wtxtbul/satSSS/SMAP")
elif provider_name == "smos":
Expand Down
Loading