-
Notifications
You must be signed in to change notification settings - Fork 5
Added seaice and sea surface salinity database #57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 15 commits
Commits
Show all changes
32 commits
Select commit
Hold shift + click to select a range
abd788a
initial
apchoiCMD ce7a139
initial config
apchoiCMD 00aa670
dcom folder
2dd51a6
add the sss database
c44112a
initial processing test
dc4393a
2nd checking
7fbfb18
add the database
apchoiCMD a0628e6
py-norm_1
apchoiCMD 0d6cc7d
py-test_2
apchoiCMD b7b215b
py-norm_3
apchoiCMD 547497f
py-norm_4
apchoiCMD 8af6dfa
py-norm_5
apchoiCMD ae9a031
py-norm_6
apchoiCMD dbae9f2
update globing amsr2
apchoiCMD d0373ba
remove variable
apchoiCMD ba96dfb
address comments
apchoiCMD 281a351
correct provider name
apchoiCMD ca36d10
add name -nesdis- to seaice product
apchoiCMD 383f63a
revise config
apchoiCMD 6cb2318
revise kwargs
apchoiCMD 4929d94
config, provider and obs_space
apchoiCMD da77a0c
clean up and obs_type convention
apchoiCMD b16a4ed
obs_space correction
apchoiCMD 223851d
fix pytest
apchoiCMD bc63ab1
marine icec kwargs
apchoiCMD a2cb073
address comments
apchoiCMD 4c10563
clean up
apchoiCMD c6a117b
config yaml for hercules
apchoiCMD 6050ede
smap success
apchoiCMD c8ceb9f
pytest
apchoiCMD 539a129
fix pytest
apchoiCMD 7f7c7fd
address copilot's review
apchoiCMD File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| import os | ||
| import glob | ||
| from datetime import datetime | ||
| from pyobsforge.obsdb import BaseDatabase | ||
|
|
||
|
|
||
| class Amsr2Database(BaseDatabase): | ||
| """Class to manage an observation file database for data assimilation.""" | ||
|
|
||
| def __init__(self, db_name="amsr2.db", | ||
| dcom_dir="/lfs/h1/ops/prod/dcom/", | ||
| obs_dir="seaice/pda"): | ||
| base_dir = os.path.join(dcom_dir, '*', obs_dir) | ||
| super().__init__(db_name, base_dir) | ||
|
|
||
| def create_database(self): | ||
| """ | ||
| Create the SQLite database and observation files table. | ||
|
|
||
| This method initializes the database with a table named `obs_files` to store metadata | ||
| about observation files. The table contains the following columns: | ||
|
|
||
| - `id`: A unique identifier for each record (auto-incremented primary key). | ||
| - `filename`: The full path to the observation file (must be unique). | ||
| - `obs_time`: The timestamp of the observation, extracted from the filename. | ||
| - `receipt_time`: The timestamp when the file was added to the `dcom` directory. | ||
| - `instrument`: The instrument used to collect the observation (e.g., AMSR2). | ||
| - `satellite`: The satellite from which the observation was collected (e.g., GW1). | ||
| - `obs_type`: The type of observation (e.g., SEAICE) | ||
|
|
||
| The table is created if it does not already exist. | ||
| """ | ||
| query = """ | ||
| CREATE TABLE IF NOT EXISTS obs_files ( | ||
| id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
| filename TEXT UNIQUE, | ||
| obs_time TIMESTAMP, | ||
| receipt_time TIMESTAMP, | ||
| instrument TEXT, | ||
| satellite TEXT, | ||
| obs_type TEXT | ||
| ) | ||
| """ | ||
| self.execute_query(query) | ||
|
|
||
| def parse_filename(self, filename): | ||
| # Example filename: | ||
| # AMSR2-SEAICE-NH_v2r2_GW1_s202503140032240_e202503140211220_c202503140245560.nc | ||
| parts = os.path.basename(filename).replace('_', '-').split('-') | ||
| try: | ||
| if len(parts) >= 8 and parts[0] == 'AMSR2': | ||
| instrument = parts[0] | ||
| obs_type = parts[1] | ||
| satellite = parts[4] | ||
| obs_time_str = parts[5][1:16] | ||
| receipt_time_str = parts[7].split('.')[0][1:16] | ||
|
|
||
| obs_time = datetime.strptime(obs_time_str, "%Y%m%d%H%M%S%f") | ||
| receipt_time = datetime.strptime(receipt_time_str, "%Y%m%d%H%M%S%f") | ||
| return filename, obs_time, receipt_time, instrument, satellite, obs_type | ||
| except Exception as e: | ||
| print(f"[DEBUG] Error parsing filename {filename}: {e}") | ||
|
guillaumevernieres marked this conversation as resolved.
|
||
| return None | ||
|
|
||
| def ingest_files(self): | ||
| """Scan the directory for new observation files and insert them into the database.""" | ||
| obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) | ||
| print(f"[INFO] Found {len(obs_files)} new files to ingest") | ||
| print(f"[INFO] Files found: {obs_files}") | ||
|
|
||
| # Counter for successful ingestions | ||
| ingested_count = 0 | ||
|
|
||
| for file in obs_files: | ||
| parsed_data = self.parse_filename(file) | ||
| if parsed_data: | ||
| query = """ | ||
| INSERT INTO obs_files (filename, obs_time, receipt_time, instrument, satellite, obs_type) | ||
| VALUES (?, ?, ?, ?, ?, ?) | ||
| """ | ||
| try: | ||
| self.insert_record(query, parsed_data) | ||
| ingested_count += 1 | ||
| except Exception as e: | ||
| print(f"[DEBUG] Failed to insert record for {file}: {e}") | ||
| print(f"################################ Successfully ingested {ingested_count} files into the database.") | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| import os | ||
| import glob | ||
| from datetime import datetime | ||
| from pyobsforge.obsdb import BaseDatabase | ||
|
|
||
|
|
||
| class SmapDatabase(BaseDatabase): | ||
| """Class to manage an observation file database for data assimilation.""" | ||
|
|
||
| def __init__(self, db_name="smap.db", | ||
| dcom_dir="/lfs/h1/ops/prod/dcom/", | ||
| obs_dir="wtxtbul/satSSS/SMAP"): | ||
| base_dir = os.path.join(dcom_dir, '*', obs_dir) | ||
| super().__init__(db_name, base_dir) | ||
|
|
||
| def create_database(self): | ||
| """ | ||
| Create the SQLite database and observation files table. | ||
|
|
||
| This method initializes the database with a table named `obs_files` to store metadata | ||
| about observation files. The table contains the following columns: | ||
|
|
||
| - `id`: A unique identifier for each record (auto-incremented primary key). | ||
| - `filename`: The full path to the observation file (must be unique). | ||
| - `obs_time`: The timestamp of the observation, extracted from the filename. | ||
| - `receipt_time`: The timestamp when the file was added to the `dcom` directory. | ||
| - `satellite`: The satellite from which the observation was collected (e.g., GW1). | ||
|
|
||
| The table is created if it does not already exist. | ||
| """ | ||
| query = """ | ||
| CREATE TABLE IF NOT EXISTS obs_files ( | ||
| id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
| filename TEXT UNIQUE, | ||
| obs_time TIMESTAMP, | ||
| receipt_time TIMESTAMP, | ||
| satellite TEXT | ||
| ) | ||
| """ | ||
| self.execute_query(query) | ||
|
|
||
| def parse_filename(self, filename): | ||
| # patten: SMAP_L2B_SSS_NRT_54047_A_20250315T011742.h5 | ||
| basename = os.path.basename(filename) | ||
| parts = basename.split('_') | ||
| try: | ||
| if basename.startswith("SMAP_L2B_SSS_NRT") and len(parts) >= 7: | ||
|
apchoiCMD marked this conversation as resolved.
Outdated
|
||
| satellite = "SMAP" | ||
| timestamp_with_ext = parts[6] | ||
| timestamp_str = os.path.splitext(timestamp_with_ext)[0] | ||
| obs_time = datetime.strptime(timestamp_str, "%Y%m%dT%H%M%S") | ||
| receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) | ||
| return filename, obs_time, receipt_time, satellite | ||
|
|
||
| except ValueError as e: | ||
| print(f"[DEBUG] Error parsing filename {filename}: {e}") | ||
| return None | ||
|
|
||
| def ingest_files(self): | ||
| """Scan the directory for new observation files and insert them into the database.""" | ||
| obs_files = glob.glob(os.path.join(self.base_dir, "*.h5")) | ||
| print(f"Found {len(obs_files)} new files to ingest") | ||
|
|
||
| # Counter for successful ingestions | ||
| ingested_count = 0 | ||
|
|
||
| for file in obs_files: | ||
| parsed_data = self.parse_filename(file) | ||
| if parsed_data: | ||
| query = """ | ||
| INSERT INTO obs_files (filename, obs_time, receipt_time, satellite) | ||
| VALUES (?, ?, ?, ?) | ||
| """ | ||
| try: | ||
| self.insert_record(query, parsed_data) | ||
| ingested_count += 1 | ||
| except Exception as e: | ||
| print(f"Failed to insert record for {file}: {e}") | ||
| print(f"################################ Successfully ingested {ingested_count} files into the database.") | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| import os | ||
| import glob | ||
| from datetime import datetime | ||
| from pyobsforge.obsdb import BaseDatabase | ||
|
|
||
|
|
||
| class SmosDatabase(BaseDatabase): | ||
| """Class to manage an observation file database for data assimilation.""" | ||
|
|
||
| def __init__(self, db_name="smos.db", | ||
| dcom_dir="/lfs/h1/ops/prod/dcom/", | ||
| obs_dir="wtxtbul/satSSS/SMOS"): | ||
| base_dir = os.path.join(dcom_dir, '*', obs_dir) | ||
| super().__init__(db_name, base_dir) | ||
|
|
||
| def create_database(self): | ||
| """ | ||
| Create the SQLite database and observation files table. | ||
|
|
||
| This method initializes the database with a table named `obs_files` to store metadata | ||
| about observation files. The table contains the following columns: | ||
|
|
||
| - `id`: A unique identifier for each record (auto-incremented primary key). | ||
| - `filename`: The full path to the observation file (must be unique). | ||
| - `obs_time`: The timestamp of the observation, extracted from the filename. | ||
| - `receipt_time`: The timestamp when the file was added to the `dcom` directory. | ||
| - `satellite`: The satellite from which the observation was collected (e.g., GW1). | ||
|
|
||
| The table is created if it does not already exist. | ||
| """ | ||
| query = """ | ||
| CREATE TABLE IF NOT EXISTS obs_files ( | ||
| id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
| filename TEXT UNIQUE, | ||
| obs_time TIMESTAMP, | ||
| receipt_time TIMESTAMP, | ||
| satellite TEXT | ||
| ) | ||
| """ | ||
| self.execute_query(query) | ||
|
|
||
| def parse_filename(self, filename): | ||
| # patten: SM_OPER_MIR_OSUDP2_20250315T001156_20250315T010515_700_001_1.nc | ||
| basename = os.path.basename(filename) | ||
| parts = basename.split('_') | ||
| try: | ||
| if basename.startswith("SM_OPER_MIR_OSUDP") and len(parts) >= 6: | ||
|
apchoiCMD marked this conversation as resolved.
Outdated
|
||
| satellite = "SMOS" | ||
| start_time_str = parts[4] | ||
| obs_time = datetime.strptime(start_time_str, "%Y%m%dT%H%M%S") | ||
| receipt_time = datetime.fromtimestamp(os.path.getctime(filename)) | ||
| return filename, obs_time, receipt_time, satellite | ||
|
|
||
| except ValueError as e: | ||
| print(f"[DEBUG] Error parsing filename {filename}: {e}") | ||
| return None | ||
|
|
||
| def ingest_files(self): | ||
| """Scan the directory for new observation files and insert them into the database.""" | ||
| obs_files = glob.glob(os.path.join(self.base_dir, "*.nc")) | ||
| print(f"Found {len(obs_files)} new files to ingest") | ||
|
|
||
| # Counter for successful ingestions | ||
| ingested_count = 0 | ||
|
|
||
| for file in obs_files: | ||
| parsed_data = self.parse_filename(file) | ||
| if parsed_data: | ||
| query = """ | ||
| INSERT INTO obs_files (filename, obs_time, receipt_time, satellite) | ||
| VALUES (?, ?, ?, ?) | ||
| """ | ||
| try: | ||
| self.insert_record(query, parsed_data) | ||
| ingested_count += 1 | ||
| except Exception as e: | ||
| print(f"Failed to insert record for {file}: {e}") | ||
| print(f"################################ Successfully ingested {ingested_count} files into the database.") | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.