diff --git a/.gitignore b/.gitignore index 595f5168..af1f1e0a 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ bin/ build*/ install*/ +__pycache__*/ # Ignore the following bundle repositories /ecbuild @@ -49,3 +50,6 @@ install*/ /sorc/obsforge-utils !/bundle/CMakeLists.txt /bundle/* + +# rocoto +*.db diff --git a/bundle/CMakeLists.txt b/bundle/CMakeLists.txt index 0cc0803c..4545a63e 100644 --- a/bundle/CMakeLists.txt +++ b/bundle/CMakeLists.txt @@ -44,6 +44,7 @@ option( ENABLE_LORENZ95_MODEL "Build LORENZ95 toy model" OFF ) option( ENABLE_QG_MODEL "Build QG toy model" OFF ) # EMC BUFR-query library +set( SKIP_DOWNLOAD_TEST_DATA OFF ) ecbuild_bundle( PROJECT bufr-query SOURCE "../sorc/bufr-query" ) # Core JEDI repositories diff --git a/jobs/JOBSFORGE_GLOBAL_AOD_DUMP b/jobs/JOBSFORGE_GLOBAL_AOD_DUMP new file mode 100755 index 00000000..05a20b12 --- /dev/null +++ b/jobs/JOBSFORGE_GLOBAL_AOD_DUMP @@ -0,0 +1,45 @@ +#! /usr/bin/env bash + +source "${HOMEobsforge}/ush/preamble.sh" +source "${HOMEobsforge}/ush/jjob_header.sh" + +############################################## +# Set variables used in the script +############################################## + + +############################################## +# Begin JOB SPECIFIC work +############################################## + +############################################################### +# Run relevant script + +EXSCRIPT=${DUMPAODPY:-${HOMEobsforge}/scripts/exobsforge_global_aod_dump.py} +${EXSCRIPT} +status=$? +if [[ ${status} -ne 0 ]]; then + exit "${status}" +fi + + +############################################## +# End JOB SPECIFIC work +############################################## + +############################################## +# Final processing +############################################## +if [[ -e "${pgmout}" ]] ; then + cat "${pgmout}" +fi + +########################################## +# Remove the Temporary working directory +########################################## +cd "${DATAROOT}" || exit +if [[ "${KEEPDATA}" == "NO" ]]; then + rm -rf "${DATA}" +fi + +exit 0 diff --git a/jobs/rocoto/aoddump.sh b/jobs/rocoto/aoddump.sh new file mode 100755 index 00000000..dd8bec25 --- /dev/null +++ b/jobs/rocoto/aoddump.sh @@ -0,0 +1,20 @@ +#! /usr/bin/env bash + +source "${HOMEobsforge}/ush/preamble.sh" + +############################################################### +# Source UFSDA workflow modules +. "${HOMEobsforge}/ush/load_obsforge_modules.sh" +status=$? +if [[ ${status} -ne 0 ]]; then + exit "${status}" +fi + +export job="aoddump" +export jobid="${job}.$$" + +############################################################### +# Execute the JJOB +"${HOMEobsforge}/jobs/JOBSFORGE_GLOBAL_AOD_DUMP" +status=$? +exit "${status}" diff --git a/parm/config.yaml b/parm/config.yaml new file mode 100644 index 00000000..e499688f --- /dev/null +++ b/parm/config.yaml @@ -0,0 +1,4 @@ +obsforge: + assim_freq: 6 +aoddump: + platforms: ['npp', 'n20', 'n21'] diff --git a/parm/obsforge_test_hera.xml b/parm/obsforge_test_hera.xml new file mode 100644 index 00000000..dc59281f --- /dev/null +++ b/parm/obsforge_test_hera.xml @@ -0,0 +1,91 @@ + + + + + + +]> + + + + /scratch2/NCEPDEV/stmp1/Cory.R.Martin/obsforge/COMROOT/obsforge/logs/@Y@m@d@H.log + + + 202503160000 202503170000 06:00:00 + 202503160000 202503170000 06:00:00 + + + + /scratch2/NCEPDEV/stmp1/Cory.R.Martin/mar2025/obsforge/jobs/rocoto/aoddump.sh + + obsforge_gfs_aod_dump_@H + da-cpu + batch + hera + 00:30:00 + 1:ppn=1:tpp=1 + 96GB + --export=NONE + + /scratch2/NCEPDEV/stmp1/Cory.R.Martin/obsforge/COMROOT/obsforge/logs/@Y@m@d@H/gfs_aod_dump_prep.log + + RUN_ENVIRemc + HOMEobsforge/scratch2/NCEPDEV/stmp1/Cory.R.Martin/mar2025/obsforge/ + NETgfs + RUNgfs + CDATE@Y@m@d@H + PDY@Y@m@d + cyc@H + KEEPDATANO + COMROOT/scratch2/NCEPDEV/stmp1/Cory.R.Martin/com + DCOMROOT/scratch2/NCEPDEV/stmp1/Cory.R.Martin/dcom + DATAROOT/scratch1/NCEPDEV/stmp2/Cory.R.Martin/RUNDIRS/obsforge/gfs.@Y@m@d@H + + + + + + + + /scratch2/NCEPDEV/stmp1/Cory.R.Martin/mar2025/obsforge/jobs/rocoto/aoddump.sh + + obsforge_gdas_aod_dump_@H + da-cpu + batch + hera + 00:30:00 + 1:ppn=1:tpp=1 + 96GB + --export=NONE + + /scratch2/NCEPDEV/stmp1/Cory.R.Martin/obsforge/COMROOT/obsforge/logs/@Y@m@d@H/gdas_aod_dump_prep.log + + RUN_ENVIRemc + HOMEobsforge/scratch2/NCEPDEV/stmp1/Cory.R.Martin/mar2025/obsforge/ + NETgfs + RUNgdas + CDATE@Y@m@d@H + PDY@Y@m@d + cyc@H + KEEPDATANO + COMROOT/scratch2/NCEPDEV/stmp1/Cory.R.Martin/com + DCOMROOT/scratch2/NCEPDEV/stmp1/Cory.R.Martin/dcom + DATAROOT/scratch1/NCEPDEV/stmp2/Cory.R.Martin/RUNDIRS/obsforge/gdas.@Y@m@d@H + + + + + + diff --git a/scripts/exobsforge_global_aod_dump.py b/scripts/exobsforge_global_aod_dump.py new file mode 100755 index 00000000..fa3d6582 --- /dev/null +++ b/scripts/exobsforge_global_aod_dump.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# exobsforge_global_aod_dump.py +# This script will collect and preprocess +# aerosol optical depth observations for +# global aerosol assimilation +import os + +from wxflow import AttrDict, Logger, cast_strdict_as_dtypedict, parse_j2yaml +from pyobsforge.task.aero_prepobs import AerosolObsPrep + +# Initialize root logger +logger = Logger(level='DEBUG', colored_log=True) + + +if __name__ == '__main__': + + # Take configuration from environment and cast it as python dictionary + config_env = cast_strdict_as_dtypedict(os.environ) + # Take configuration from YAML file to augment/append config dict + config_yaml = parse_j2yaml(os.path.join(config_env['HOMEobsforge'], 'parm', 'config.yaml'), config_env) + # Combine configs together + config = AttrDict(**config_env, **config_yaml['obsforge']) + config = AttrDict(**config, **config_yaml['aoddump']) + + AeroObs = AerosolObsPrep(config) + AeroObs.initialize() + AeroObs.execute() + AeroObs.finalize() diff --git a/sorc/bufr-query b/sorc/bufr-query index 8bc28860..cdf5330f 160000 --- a/sorc/bufr-query +++ b/sorc/bufr-query @@ -1 +1 @@ -Subproject commit 8bc28860d562f9a53ffa86e7c5c5ae05c2c4e09f +Subproject commit cdf5330fcaa8a91be3b72749c7538aa984f7eba7 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 184c4499..8e14f347 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -301,44 +301,46 @@ foreach(file ${test_aux_files}) ${CMAKE_CURRENT_BINARY_DIR}/${file} ) endforeach(file) -# =================================================================== -# Download the test data ./build/obsForge/test/ if we don't have it -# =================================================================== - -set(DOWNLOAD_URL "https://ftp.emc.ncep.noaa.gov/static_files/public/obsforge") -set(FILE_COLLECTION "obsforge-0.0.0.tgz") -#set(FILE_COLLECTION "obsforge-0.0.1.tgz") - -# Get the test data if we don't have it. -if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}") - message(STATUS "Downloading: ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}") - file(DOWNLOAD - ${DOWNLOAD_URL}/${FILE_COLLECTION} - ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION} - SHOW_PROGRESS - STATUS download_status - INACTIVITY_TIMEOUT 30 - ) - - list(GET download_status 0 download_status_num) +if(NOT SKIP_DOWNLOAD_TEST_DATA) + # =================================================================== + # Download the test data ./build/obsForge/test/ if we don't have it + # =================================================================== + + set(DOWNLOAD_URL "https://ftp.emc.ncep.noaa.gov/static_files/public/obsforge") + set(FILE_COLLECTION "obsforge-0.0.0.tgz") + #set(FILE_COLLECTION "obsforge-0.0.1.tgz") + + # Get the test data if we don't have it. + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}") + message(STATUS "Downloading: ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}") + file(DOWNLOAD + ${DOWNLOAD_URL}/${FILE_COLLECTION} + ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION} + SHOW_PROGRESS + STATUS download_status + INACTIVITY_TIMEOUT 30 + ) + + list(GET download_status 0 download_status_num) + + if(NOT download_status_num EQUAL 0 OR NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}) + # Remove empty file if download doesn't complete + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${BUFR_TAR}) + message(STATUS "Could not download test files, not building tests") + return() + endif() - if(NOT download_status_num EQUAL 0 OR NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}) - # Remove empty file if download doesn't complete - file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${BUFR_TAR}) - message(STATUS "Could not download test files, not building tests") - return() + # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${FILE_COLLECTION} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + + add_custom_target(get_obsforge_test_data ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}) + add_custom_command( + TARGET get_obsforge_test_data + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} tar xzf ${FILE_COLLECTION} + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf testdata + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf testoutput + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} mv remote_data/testdata testdata + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} mv remote_data/testoutput testoutput + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf remote_data) endif() - - # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${FILE_COLLECTION} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) - - add_custom_target(get_obsforge_test_data ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${FILE_COLLECTION}) - add_custom_command( - TARGET get_obsforge_test_data - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} tar xzf ${FILE_COLLECTION} - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf testdata - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf testoutput - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} mv remote_data/testdata testdata - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} mv remote_data/testoutput testoutput - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_CURRENT_BINARY_DIR} rm -rf remote_data) endif() diff --git a/ush/bash_utils.sh b/ush/bash_utils.sh new file mode 100755 index 00000000..b8ce729c --- /dev/null +++ b/ush/bash_utils.sh @@ -0,0 +1,126 @@ +#! /usr/bin/env bash + +function declare_from_tmpl() { + # + # Define variables from corresponding templates by substituting in env variables. + # + # Each template must already be defined. Any variables in the template are replaced + # with their values. Undefined variables are just removed WITHOUT raising an error. + # + # Accepts as options `-r` and `-x`, which do the same thing as the same options in + # `declare`. Variables are automatically marked as `-g` so the variable is visible + # in the calling script. + # + # Syntax: + # declare_from_tmpl [-rx] $var1[:$tmpl1] [$var2[:$tmpl2]] [...]] + # + # options: + # -r: Make variable read-only (same as `declare -r`) + # -x: Mark variable for export (same as `declare -x`) + # var1, var2, etc: Variable names whose values will be generated from a template + # and declared + # tmpl1, tmpl2, etc: Specify the template to use (default is "${var}_TMPL") + # + # Examples: + # # Current cycle and RUN, implicitly using template COM_ATMOS_ANALYSIS_TMPL + # YMD=${PDY} HH=${cyc} declare_from_tmpl -rx COM_ATMOS_ANALYSIS + # + # # Previous cycle and gdas using an explicit template + # RUN=${GDUMP} YMD=${gPDY} HH=${gcyc} declare_from_tmpl -rx \ + # COM_ATMOS_HISTORY_PREV:COM_ATMOS_HISTORY_TMPL + # + # # Current cycle and COM for first member + # MEMDIR='mem001' YMD=${PDY} HH=${cyc} declare_from_tmpl -rx COM_ATMOS_HISTORY + # + if [[ ${DEBUG_WORKFLOW:-"NO"} == "NO" ]]; then set +x; fi + local opts="-g" + local OPTIND=1 + while getopts "rx" option; do + opts="${opts}${option}" + done + shift $((OPTIND-1)) + + for input in "$@"; do + IFS=':' read -ra args <<< "${input}" + local com_var="${args[0]}" + local template + local value + if (( ${#args[@]} > 1 )); then + template="${args[1]}" + else + template="${com_var}_TMPL" + fi + if [[ ! -v "${template}" ]]; then + echo "FATAL ERROR in declare_from_tmpl: Requested template ${template} not defined!" + exit 2 + fi + value=$(echo "${!template}" | envsubst) + # shellcheck disable=SC2086 + declare ${opts} "${com_var}"="${value}" + # shellcheck disable= + echo "declare_from_tmpl :: ${com_var}=${value}" + done + set_trace +} + +function wait_for_file() { + # + # Wait for a file to exist and return the status. + # + # Checks if a file exists periodically up to a maximum number of attempts. When the file + # exists or the limit is reached, the status is returned (0 if the file exists,1 if it + # does not). This allows it to be used as a conditional to handle missing files. + # + # Syntax: + # wait_for_file file_name [sleep_interval [max_tries]] + # + # file_name: File to check the existence of (must be readable) + # sleep_interval: Time to wait between each check (in seconds) [default: 60] + # max_tries: The maximum number of checks to make [default: 100] + # + # Example: + # ``` + # file_name=/path/to/foo + # sleep_interval=60 + # max_tries=30 + # if wait_for_file; then + # echo "FATAL ERROR: ${file_name} still does not exist after waiting one-half hour." + # exit 1 + # fi + # # Code that depends on file existing + # ``` + # + set +x + local file_name=${1:?"wait_for_file() requires a file name"} + local sleep_interval=${2:-60} + local max_tries=${3:-100} + + for (( iter=0; iter 0 )); then + id="(${1})" +else + id="" +fi + +# Record the start time so we can calculate the elapsed time later +start_time=$(date +%s) + +# Get the base name of the calling script +_calling_script=$(basename "${BASH_SOURCE[1]}") + +# Announce the script has begun +start_time_human=$(date -d"@${start_time}" -u) +echo "Begin ${_calling_script} at ${start_time_human}" + +declare -rx PS4='+ $(basename ${BASH_SOURCE[0]:-${FUNCNAME[0]:-"Unknown"}})[${LINENO}]'"${id}: " + +set_strict() { + if [[ ${STRICT:-"YES"} == "YES" ]]; then + # Exit on error and undefined variable + set -eu + fi +} + +set_trace() { + # Print the script name and line number of each command as it is + # executed when using trace. + if [[ ${TRACE:-"YES"} == "YES" ]]; then + set -x + fi +} + +postamble() { + # + # Commands to execute when a script ends. + # + # Syntax: + # postamble script start_time rc + # + # Arguments: + # script: name of the script ending + # start_time: start time of script (in seconds) + # rc: the exit code of the script + # + + set +x + script="${1}" + start_time="${2}" + rc="${3}" + + # Execute postamble command + # + # Commands can be added to the postamble by appending them to $POSTAMBLE_CMD: + # POSTAMBLE_CMD="new_thing; ${POSTAMBLE_CMD:-}" # (before existing commands) + # POSTAMBLE_CMD="${POSTAMBLE_CMD:-}; new_thing" # (after existing commands) + # + # Always use this form so previous POSTAMBLE_CMD are not overwritten. This should + # only be used for commands that execute conditionally (i.e. on certain machines + # or jobs). Global changes should just be added to this function. + # These commands will be called when EACH SCRIPT terminates, so be mindful. Please + # consult with global-workflow CMs about permanent changes to $POSTAMBLE_CMD or + # this postamble function. + # + + if [[ -v 'POSTAMBLE_CMD' ]]; then + ${POSTAMBLE_CMD} + fi + + # Calculate the elapsed time + end_time=$(date +%s) + end_time_human=$(date -d@"${end_time}" -u +%H:%M:%S) + elapsed_sec=$((end_time - start_time)) + elapsed=$(date -d@"${elapsed_sec}" -u +%H:%M:%S) + + # Announce the script has ended, then pass the error code up + echo "End ${script} at ${end_time_human} with error code ${rc:-0} (time elapsed: ${elapsed})" + exit "${rc}" +} + +# Place the postamble in a trap so it is always called no matter how the script exits +# Shellcheck: Turn off warning about substitions at runtime instead of signal time +# shellcheck disable=SC2064 +trap "postamble ${_calling_script} ${start_time} \$?" EXIT +# shellcheck disable= + +source "${HOMEobsforge}/ush/bash_utils.sh" + +# Turn on our settings +set_strict +set_trace diff --git a/ush/python/pyobsforge/__init__.py b/ush/python/pyobsforge/__init__.py new file mode 100644 index 00000000..57f3d08e --- /dev/null +++ b/ush/python/pyobsforge/__init__.py @@ -0,0 +1,5 @@ +import os + +__docformat__ = "restructuredtext" +__version__ = "0.1.0" +pyobsforge_directory = os.path.dirname(__file__) diff --git a/ush/python/pyobsforge/task/__init__.py b/ush/python/pyobsforge/task/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ush/python/pyobsforge/task/aero_prepobs.py b/ush/python/pyobsforge/task/aero_prepobs.py new file mode 100644 index 00000000..7f7c5402 --- /dev/null +++ b/ush/python/pyobsforge/task/aero_prepobs.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import os +import glob +import gzip +import tarfile +import re +from logging import getLogger +from typing import List, Dict, Any, Union + +from wxflow import (AttrDict, FileHandler, rm_p, rmdir, + Task, add_to_datetime, to_timedelta, to_datetime, + datetime_to_YMD, + chdir, Executable, WorkflowException, + parse_j2yaml, save_as_yaml, logit) + +logger = getLogger(__name__.split('.')[-1]) + + +class AerosolObsPrep(Task): + """ + Class for preparing and managing aerosol observations + """ + def __init__(self, config: Dict[str, Any]) -> None: + super().__init__(config) + + _window_begin = add_to_datetime(self.task_config.current_cycle, -to_timedelta(f"{self.task_config['assim_freq']}H") / 2) + _window_end = add_to_datetime(self.task_config.current_cycle, +to_timedelta(f"{self.task_config['assim_freq']}H") / 2) + + local_dict = AttrDict( + { + 'window_begin': _window_begin, + 'window_end': _window_end, + 'OPREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z.", + 'APREFIX': f"{self.task_config.RUN}.t{self.task_config.cyc:02d}z." + } + ) + + # task_config is everything that this task should need + self.task_config = AttrDict(**self.task_config, **local_dict) + + @logit(logger) + def initialize(self) -> None: + """ + """ + print("initialize") + + @logit(logger) + def execute(self) -> None: + """ + """ + print("execute") + + @logit(logger) + def finalize(self) -> None: + """ + """ + print("finalize") diff --git a/ush/python/wxflow b/ush/python/wxflow new file mode 120000 index 00000000..f2360088 --- /dev/null +++ b/ush/python/wxflow @@ -0,0 +1 @@ +../../sorc/wxflow/src/wxflow/ \ No newline at end of file