diff --git a/src/bufr/BufrParser/BufrParser.cpp b/src/bufr/BufrParser/BufrParser.cpp index 4686012ae..00160e0d7 100644 --- a/src/bufr/BufrParser/BufrParser.cpp +++ b/src/bufr/BufrParser/BufrParser.cpp @@ -48,7 +48,8 @@ namespace Ingester { { auto startTime = std::chrono::steady_clock::now(); - auto querySet = bufr::QuerySet(); + auto querySet = bufr::QuerySet(description_.getExport().getSubsets()); + for (const auto &var : description_.getExport().getVariables()) { for (const auto &queryPair : var->getQueryList()) diff --git a/src/bufr/BufrParser/Exports/Export.cpp b/src/bufr/BufrParser/Exports/Export.cpp index b421af3ad..7c3fafee8 100644 --- a/src/bufr/BufrParser/Exports/Export.cpp +++ b/src/bufr/BufrParser/Exports/Export.cpp @@ -28,6 +28,7 @@ namespace const char* Splits = "splits"; const char* Variables = "variables"; const char* GroupByVariable = "group_by_variable"; + const char* Subsets = "subsets"; namespace Variable { @@ -75,6 +76,11 @@ namespace Ingester groupByVariable = conf.getString(ConfKeys::GroupByVariable); } + if (conf.has(ConfKeys::Subsets)) + { + subsets_ = conf.getStringVector(ConfKeys::Subsets); + } + if (conf.has(ConfKeys::Variables)) { addVariables(conf.getSubConfiguration(ConfKeys::Variables), diff --git a/src/bufr/BufrParser/Exports/Export.h b/src/bufr/BufrParser/Exports/Export.h index 51a574ac7..baed395d5 100644 --- a/src/bufr/BufrParser/Exports/Export.h +++ b/src/bufr/BufrParser/Exports/Export.h @@ -35,11 +35,13 @@ namespace Ingester inline Splits getSplits() const { return splits_; } inline Variables getVariables() const { return variables_; } inline Filters getFilters() const { return filters_; } + inline std::vector getSubsets() const { return subsets_; } private: Splits splits_; Variables variables_; Filters filters_; + std::vector subsets_; /// \brief Create Variables exports from config. void addVariables(const eckit::Configuration &conf, diff --git a/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp b/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp index 77c51fba4..8d30aa319 100644 --- a/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp +++ b/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp @@ -83,11 +83,10 @@ namespace Ingester auto location = Location(dataObject->getDims().size(), 0); location[0] = rowIdx; - auto itemVal = dataObject->getAsFloat(location); - if (trunc(itemVal) == itemVal) + if (auto dat = std::dynamic_pointer_cast> (dataObject)) { - nameMap_.insert({static_cast (itemVal), - std::to_string(static_cast (itemVal))}); + auto itemVal = dat->get(location); + nameMap_.insert({itemVal, std::to_string(itemVal)}); } else { diff --git a/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp b/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp index d20bd149c..50116edbc 100644 --- a/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp +++ b/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -71,8 +72,7 @@ namespace Ingester std::shared_ptr DatetimeVariable::exportData(const BufrDataMap& map) { checkKeys(map); - static const float missing = 1.e+11; - static const int64_t missing_int = INT_MIN; + static const int missingInt = DataObject::missingValue(); std::tm tm{}; // zero initialise tm.tm_year = 1970-1900; // 1970 @@ -83,26 +83,38 @@ namespace Ingester tm.tm_sec = 0; tm.tm_isdst = 0; // Not daylight saving std::time_t epochDt = std::mktime(&tm); - std::time_t this_time = std::mktime(&tm); - int64_t diff_time; std::vector timeOffsets; timeOffsets.reserve(map.at(getExportKey(ConfKeys::Year))->size()); + // Validation + if (map.at(getExportKey(ConfKeys::Year))->getDims().size() != 1 || + map.at(getExportKey(ConfKeys::Month))->getDims().size() != 1 || + map.at(getExportKey(ConfKeys::Day))->getDims().size() != 1 || + (!minuteQuery_.empty() && + map.at(getExportKey(ConfKeys::Minute))->getDims().size() != 1) || + (!secondQuery_.empty() && + map.at(getExportKey(ConfKeys::Second))->getDims().size() != 1)) + { + std::ostringstream errStr; + errStr << "Datetime variables must be 1 dimensional."; + throw eckit::BadParameter(errStr.str()); + } + for (unsigned int idx = 0; idx < map.at(getExportKey(ConfKeys::Year))->size(); idx++) { - int year = static_cast(map.at(getExportKey(ConfKeys::Year))->getAsFloat(idx)); - int month = static_cast(map.at(getExportKey(ConfKeys::Month))->getAsFloat(idx)); - int day = static_cast(map.at(getExportKey(ConfKeys::Day))->getAsFloat(idx)); - int hour = static_cast(map.at(getExportKey(ConfKeys::Hour))->getAsFloat(idx)); + int year = map.at(getExportKey(ConfKeys::Year))->getAsInt(idx); + int month = map.at(getExportKey(ConfKeys::Month))->getAsInt(idx); + int day = map.at(getExportKey(ConfKeys::Day))->getAsInt(idx); + int hour = map.at(getExportKey(ConfKeys::Hour))->getAsInt(idx); int minutes = 0; int seconds = 0; - diff_time = missing_int; - if (year != missing && - month != missing && - day != missing && - hour != missing) + auto diff_time = DataObject::missingValue(); + if (year != missingInt && + month != missingInt && + day != missingInt && + hour != missingInt) { tm.tm_year = year - 1900; tm.tm_mon = month - 1; @@ -114,8 +126,7 @@ namespace Ingester if (!minuteQuery_.empty()) { - minutes = - static_cast(map.at(getExportKey(ConfKeys::Minute))->getAsFloat(idx)); + minutes = map.at(getExportKey(ConfKeys::Minute))->getAsInt(idx); if (minutes >= 0 && minutes < 60) { @@ -125,8 +136,7 @@ namespace Ingester if (!secondQuery_.empty()) { - seconds = - static_cast(map.at(getExportKey(ConfKeys::Second))->getAsFloat(idx)); + seconds = map.at(getExportKey(ConfKeys::Second))->getAsInt(idx); if (seconds >= 0 && seconds < 60) { @@ -134,16 +144,18 @@ namespace Ingester } } - this_time = std::mktime(&tm); - if (this_time < 0) + // Be careful with mktime as it can be very slow. + auto thisTime = std::mktime(&tm); + if (thisTime < 0) { oops::Log::warning() << "Caution, date suspicious date (year, month, day): " << year << ", " << month << ", " << day << std::endl; } - diff_time = static_cast(difftime(this_time, epochDt) - + hoursFromUtc_*3600); + + diff_time = static_cast(difftime(thisTime, epochDt) + + hoursFromUtc_ * 3600); } timeOffsets.push_back(diff_time); diff --git a/src/bufr/BufrParser/Query/File.cpp b/src/bufr/BufrParser/Query/File.cpp index 2684baf30..d05aa582d 100644 --- a/src/bufr/BufrParser/Query/File.cpp +++ b/src/bufr/BufrParser/Query/File.cpp @@ -7,9 +7,11 @@ #include "File.h" +#include + #include "bufr_interface.h" -#include "Query.h" +#include "QueryRunner.h" #include "QuerySet.h" #include "DataProvider.h" @@ -61,7 +63,7 @@ namespace bufr { { static int SubsetLen = 9; unsigned int messageNum = 0; - char subset[SubsetLen]; + char subsetChars[SubsetLen]; int iddate; int bufrLoc; @@ -70,21 +72,27 @@ namespace bufr { auto dataProvider = DataProvider(fileUnit_); auto resultSet = ResultSet(querySet.names()); - auto query = Query(querySet, resultSet, dataProvider); + auto queryRunner = QueryRunner(querySet, resultSet, dataProvider); - while (ireadmg_f(fileUnit_, subset, &iddate, SubsetLen) == 0) + while (ireadmg_f(fileUnit_, subsetChars, &iddate, SubsetLen) == 0) { - while (ireadsb_f(fileUnit_) == 0) + auto subset = std::string(subsetChars); + subset.erase(std::remove_if(subset.begin(), subset.end(), isspace), subset.end()); + + if (querySet.includesSubset(subset)) { - status_f(fileUnit_, &bufrLoc, &il, &im); - dataProvider.updateData(bufrLoc); - query.query(); + while (ireadsb_f(fileUnit_) == 0) + { + status_f(fileUnit_, &bufrLoc, &il, &im); + dataProvider.updateData(bufrLoc); + queryRunner.accumulate(); + } + + if (next > 0 && ++messageNum >= next) break; } - - if (next > 0 && ++messageNum >= next) break; } - resultSet.setTargets(query.getTargets()); + resultSet.setTargets(queryRunner.getTargets()); dataProvider.deleteData(); diff --git a/src/bufr/BufrParser/Query/QueryParser.cpp b/src/bufr/BufrParser/Query/QueryParser.cpp index e2d91ebad..28ea941f5 100644 --- a/src/bufr/BufrParser/Query/QueryParser.cpp +++ b/src/bufr/BufrParser/Query/QueryParser.cpp @@ -14,7 +14,19 @@ namespace Ingester { namespace bufr { - std::vector QueryParser::splitMultiquery(const std::string &query) { + std::vector QueryParser::parse(const std::string& queryStr) + { + std::vector queries; + for (auto& subStr : QueryParser::splitMultiquery(queryStr)) + { + queries.emplace_back(QueryParser::splitQueryStr(subStr)); + } + + return queries; + } + + std::vector QueryParser::splitMultiquery(const std::string &query) + { std::vector subqueries; // Remove whitespace from query and assign to working_str @@ -66,11 +78,8 @@ namespace bufr { return subqueries; } - - void QueryParser::splitQueryStr(const std::string& query, - std::string& subset, - std::vector& mnemonics, - int& index) { + Query QueryParser::splitQueryStr(const std::string& query) + { // Find positions of slashes std::vector slashPositions; size_t slashIdx = 0; @@ -89,7 +98,7 @@ namespace bufr { } // Capture the subset string - subset = query.substr(0, slashPositions[0]); + auto subset = query.substr(0, slashPositions[0]); std::vector mnemonicStrings(slashPositions.size()); @@ -105,7 +114,7 @@ namespace bufr { std::string lastElement = query.substr(slashPositions[slashPositions.size() - 1] + 1); // Parse last element - index = -1; + int index = -1; size_t startSubscript = lastElement.find_first_of("["); size_t endSubscript = lastElement.find_first_of("]"); if (startSubscript != std::string::npos && endSubscript != std::string::npos) @@ -126,7 +135,13 @@ namespace bufr { mnemonicStrings.back() = lastElement; } - mnemonics = mnemonicStrings; + auto queryObj = Query(); + queryObj.queryStr = query; + queryObj.subset = subset; + queryObj.mnemonics = mnemonicStrings; + queryObj.index = index; + + return queryObj; } } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/QueryParser.h b/src/bufr/BufrParser/Query/QueryParser.h index 1f860cea2..dd360e78e 100644 --- a/src/bufr/BufrParser/Query/QueryParser.h +++ b/src/bufr/BufrParser/Query/QueryParser.h @@ -13,24 +13,29 @@ namespace Ingester { namespace bufr { + struct Query + { + std::string queryStr; + std::string subset; + std::vector mnemonics; + int index; + }; + /// \brief Parses a user supplied query string into its component parts. /// \note Will be refactored to properly tokenize the query string. class QueryParser { public: + static std::vector parse(const std::string& queryStr); + + private: /// \brief Split a multi query (ex: ["*/CLONH", "*/CLON"]) into a vector of single queries. /// \param query The query to split. static std::vector splitMultiquery(const std::string& query); /// \brief Split a single query (ex: "*/ROSEQ1/ROSEQ2/PCCF[2]") into its component parts. /// \param query The query to split. - /// \param[out] subset The subset part of the query (ex: *). - /// \param[out] mnemonics Query path components (ex: ["ROSEQ1", "ROSEQ2", "PCCF"]). - /// \param[out] index The index associated with this query (ex: 2). - static void splitQueryStr(const std::string& query, - std::string& subset, - std::vector& mnemonics, - int& index); + static Query splitQueryStr(const std::string& query); private: /// \brief Private constructor. diff --git a/src/bufr/BufrParser/Query/Query.cpp b/src/bufr/BufrParser/Query/QueryRunner.cpp similarity index 85% rename from src/bufr/BufrParser/Query/Query.cpp rename to src/bufr/BufrParser/Query/QueryRunner.cpp index af1b685ac..fae955f90 100644 --- a/src/bufr/BufrParser/Query/Query.cpp +++ b/src/bufr/BufrParser/Query/QueryRunner.cpp @@ -4,7 +4,7 @@ * This software is licensed under the terms of the Apache Licence Version 2.0 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. */ -#include "Query.h" +#include "QueryRunner.h" #include "eckit/exception/Exceptions.h" #include "oops/util/Logger.h" @@ -12,7 +12,6 @@ #include #include -#include "QueryParser.h" #include "Constants.h" namespace Ingester { @@ -23,7 +22,7 @@ namespace bufr { std::vector counts; }; - Query::Query(const QuerySet &querySet, + QueryRunner::QueryRunner(const QuerySet &querySet, ResultSet &resultSet, const DataProvider &dataProvider) : querySet_(querySet), @@ -32,7 +31,8 @@ namespace bufr { { } - void Query::query() { + void QueryRunner::accumulate() + { Targets targets; std::shared_ptr<__details::ProcessingMasks> masks; @@ -40,9 +40,12 @@ namespace bufr { collectData(targets, masks, resultSet_); } - void Query::findTargets(Targets &targets, std::shared_ptr<__details::ProcessingMasks> &masks) { + void QueryRunner::findTargets(Targets &targets, + std::shared_ptr<__details::ProcessingMasks> &masks) + { // Check if the target list for this subset is cached - if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) { + if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) + { targets = targetCache_.at(dataProvider_.getSubset()); masks = maskCache_.at(dataProvider_.getSubset()); return; @@ -55,20 +58,21 @@ namespace bufr { masks->valueNodeMask.resize(numNodes, false); masks->pathNodeMask.resize(numNodes, false); - for (size_t targetIdx = 0; targetIdx < querySet_.size(); ++targetIdx) { - auto queryName = querySet_.nameAt(targetIdx); - auto queryStr = querySet_.queryAt(targetIdx); - - auto subQueries = QueryParser::splitMultiquery(queryStr); + for (size_t targetIdx = 0; targetIdx < querySet_.size(); ++targetIdx) + { + auto queryName = querySet_.names()[targetIdx]; + auto subQueries = querySet_.queriesFor(queryName); bool foundTarget = false; std::shared_ptr target; - for (size_t subQueryIdx = 0; subQueryIdx < subQueries.size(); ++subQueryIdx) { - const std::string &subQuery = subQueries[subQueryIdx]; + for (size_t subQueryIdx = 0; subQueryIdx < subQueries.size(); ++subQueryIdx) + { + const Query& subQuery = subQueries[subQueryIdx]; target = findTarget(queryName, subQuery); - if (target->nodeIds.size() > 0) { + if (target->nodeIds.size() > 0) + { // Collect mask data masks->valueNodeMask[target->nodeIds[0]] = true; for (size_t pathIdx = 0; pathIdx < target->seqPath.size(); ++pathIdx) { @@ -81,14 +85,34 @@ namespace bufr { } } - if (!foundTarget) { + if (!foundTarget) + { // Add the last missing target to the list targets.push_back(target); - oops::Log::warning() << "Warning: Query String " - << queryStr - << " didn't apply to subset " - << dataProvider_.getSubset() - << std::endl; + oops::Log::warning() << "Warning: Query String "; + + auto queries = querySet_.queriesFor(queryName); + + if (queries.size() == 1) + { + oops::Log::warning() << queries[0].queryStr; + } + else + { + oops::Log::warning() << "["; + for (auto subQuery = queries.cbegin(); + subQuery < queries.cend(); + ++subQuery) + { + if (subQuery != queries.cbegin()) oops::Log::warning() << ", "; + oops::Log::warning() << subQuery->queryStr; + } + oops::Log::warning() << "]"; + } + + oops::Log::warning() << " didn't apply to subset "; + oops::Log::warning() << dataProvider_.getSubset(); + oops::Log::warning() << std::endl; } } @@ -96,23 +120,18 @@ namespace bufr { maskCache_.insert({dataProvider_.getSubset(), masks}); } - std::shared_ptr Query::findTarget(const std::string &targetName, - const std::string &query) const { - std::string querySubset; - std::vector mnemonics; - int index; - - QueryParser::splitQueryStr(query, querySubset, mnemonics, index); - + std::shared_ptr QueryRunner::findTarget(const std::string &targetName, + const Query& query) const + { std::vector branches; std::vector targetNodes; std::vector seqPath; std::vector dimPaths; std::vector dimIdxs; - bool targetMissing = !(querySubset == "*" || querySubset == dataProvider_.getSubset()); + bool targetMissing = !(query.subset == "*" || query.subset == dataProvider_.getSubset()); if (!targetMissing) { - branches.resize(mnemonics.size() - 1); + branches.resize(query.mnemonics.size() - 1); seqPath.push_back(dataProvider_.getInode()); @@ -126,7 +145,7 @@ namespace bufr { dataProvider_.getTyp(nodeIdx) == Typ::Repeat || dataProvider_.getTyp(nodeIdx) == Typ::StackedRepeat) { if (isQueryNode(nodeIdx - 1)) { - if (dataProvider_.getTag(nodeIdx) == mnemonics[mnemonicCursor + 1] && + if (dataProvider_.getTag(nodeIdx) == query.mnemonics[mnemonicCursor + 1] && tableCursor == mnemonicCursor) { mnemonicCursor++; branches[mnemonicCursor] = nodeIdx - 1; @@ -134,9 +153,9 @@ namespace bufr { tableCursor++; } seqPath.push_back(nodeIdx); - } else if (mnemonicCursor == static_cast(mnemonics.size()) - 2 && + } else if (mnemonicCursor == static_cast(query.mnemonics.size()) - 2 && tableCursor == mnemonicCursor && - dataProvider_.getTag(nodeIdx) == mnemonics.back()) { + dataProvider_.getTag(nodeIdx) == query.mnemonics.back()) { // We found a target targetNodes.push_back(nodeIdx); getDimInfo(branches, mnemonicCursor, dimPaths, dimIdxs); @@ -195,21 +214,21 @@ namespace bufr { } } - if (index > 0 && index <= gsl::narrow(targetNodes.size())) { - targetNodes = {targetNodes[index - 1]}; + if (query.index > 0 && query.index <= gsl::narrow(targetNodes.size())) { + targetNodes = {targetNodes[query.index - 1]}; } if (targetNodes.size() > 1) { std::ostringstream errMsg; errMsg << "Query string must return 1 target. Are you missing an index? "; - errMsg << query << "."; + errMsg << query.queryStr << "."; throw eckit::BadParameter(errMsg.str()); } } auto target = std::make_shared(); target->name = targetName; - target->queryStr = query; + target->queryStr = query.queryStr; target->seqPath = branches; target->nodeIds = targetNodes; @@ -226,14 +245,14 @@ namespace bufr { return target; } - bool Query::isQueryNode(int nodeIdx) const { + bool QueryRunner::isQueryNode(int nodeIdx) const { return (dataProvider_.getTyp(nodeIdx) == Typ::DelayedRep || dataProvider_.getTyp(nodeIdx) == Typ::FixedRep || dataProvider_.getTyp(nodeIdx) == Typ::DelayedRepStacked || dataProvider_.getTyp(nodeIdx) == Typ::DelayedBinary); } - void Query::getDimInfo(const std::vector &branches, + void QueryRunner::getDimInfo(const std::vector &branches, int mnemonicCursor, std::vector &dimPaths, std::vector &dimIdxs) const { @@ -274,7 +293,7 @@ namespace bufr { } } - void Query::collectData(Targets& targets, + void QueryRunner::collectData(Targets& targets, std::shared_ptr<__details::ProcessingMasks> masks, ResultSet &resultSet) const { std::vector currentPath; diff --git a/src/bufr/BufrParser/Query/Query.h b/src/bufr/BufrParser/Query/QueryRunner.h similarity index 93% rename from src/bufr/BufrParser/Query/Query.h rename to src/bufr/BufrParser/Query/QueryRunner.h index 2075466f2..8e9d0d731 100644 --- a/src/bufr/BufrParser/Query/Query.h +++ b/src/bufr/BufrParser/Query/QueryRunner.h @@ -51,15 +51,17 @@ namespace bufr { } // namespace __details /// \brief Manages the execution of queries against on a BUFR file. - class Query + class QueryRunner { public: /// \brief Constructor. /// \param[in] querySet The set of queries to execute against the BUFR file. /// \param[in, out] resultSet The object used to store the accumulated collected data. /// \param[in] dataProvider The BUFR data provider to use. - Query(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); - void query(); + QueryRunner(const QuerySet& querySet, + ResultSet& resultSet, + const DataProvider& dataProvider); + void accumulate(); Targets getTargets() { @@ -96,8 +98,8 @@ namespace bufr { /// \brief Find the target associated with a specific user provided query string. /// \param[in] targetName The name specified for the target. /// \param[in] query The query string to use. - std::shared_ptr findTarget(const std::string& targetName, - const std::string& query) const; + std::shared_ptr findTarget(const std::string &targetName, + const Query& query) const; /// \brief Does the node idx correspond to an element you'd find in a query string (repeat diff --git a/src/bufr/BufrParser/Query/QuerySet.cpp b/src/bufr/BufrParser/Query/QuerySet.cpp index cd44f8a98..5587136de 100644 --- a/src/bufr/BufrParser/Query/QuerySet.cpp +++ b/src/bufr/BufrParser/Query/QuerySet.cpp @@ -7,20 +7,82 @@ #include "QuerySet.h" +#include +#include namespace Ingester { namespace bufr { + QuerySet::QuerySet(const std::vector& subsets) : + includesAllSubsets_(false), + limitSubsets_(std::set(subsets.begin(), + subsets.end())), + presentSubsets_({}) + { + } + + void QuerySet::add(const std::string& name, const std::string& queryStr) + { + std::vector queries; + for (const auto &query : QueryParser::parse(queryStr)) + { + if (limitSubsets_.empty()) + { + if (query.subset == "*") + { + includesAllSubsets_ = true; + } + + presentSubsets_.insert(query.subset); + } + else + { + if (query.subset == "*") + { + presentSubsets_ = limitSubsets_; + } + else + { + presentSubsets_.insert(query.subset); + + std::vector newSubsets; + std::set_intersection(limitSubsets_.begin(), + limitSubsets_.end(), + presentSubsets_.begin(), + presentSubsets_.end(), + std::back_inserter(newSubsets)); + + presentSubsets_ = std::set(newSubsets.begin(), + newSubsets.end()); + } + } + + queries.emplace_back(query); + } + + queryMap_[name] = queries; + } + + bool QuerySet::includesSubset(const std::string& subset) const + { + bool includesSubset = true; + if (!includesAllSubsets_) + { + includesSubset = (presentSubsets_.find(subset) != presentSubsets_.end()); + } + + return includesSubset; + } + std::vector QuerySet::names() const { std::vector names; - for (auto const& query : queryList_) + for (auto const& query : queryMap_) { names.push_back(query.first); } return names; } - } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/QuerySet.h b/src/bufr/BufrParser/Query/QuerySet.h index adcf26622..54864418f 100644 --- a/src/bufr/BufrParser/Query/QuerySet.h +++ b/src/bufr/BufrParser/Query/QuerySet.h @@ -7,46 +7,48 @@ #pragma once +#include #include +#include #include +#include "QueryParser.h" + namespace Ingester { namespace bufr { + typedef std::set Subsets; + /// \brief Manages a collection of queries. class QuerySet { public: - QuerySet() = default; + explicit QuerySet(const std::vector& subsets); ~QuerySet() = default; /// \brief Add a new query to the collection. /// \param[in] name The name of the query. /// \param[in] query The query string. - void add(const std::string& name, const std::string& query) - { - queryList_.push_back({name, query}); - } + void add(const std::string& name, const std::string& query); /// \brief Returns the size of the collection. - size_t size() const { return queryList_.size(); } - - /// \brief Returns the name of the query at the specified index. - /// \param[in] idx The index of the query.. - /// \return The name of the query. - std::string nameAt(size_t idx) const { return queryList_.at(idx).first; } - - /// \brief Returns the query string at the specified index. - /// \param[in] idx The index of the query. - /// \return The query string. - std::string queryAt(size_t idx) const { return queryList_.at(idx).second; } + size_t size() const { return queryMap_.size(); } /// \brief Returns the names of all the queries. /// \return A vector of the names of all the queries. std::vector names() const; + /// \brief Returns a list of subsets. + /// \return A vector of the names of all the queries. + bool includesSubset(const std::string& subset) const; + + std::vector queriesFor(const std::string& name) const { return queryMap_.at(name); } + private: - std::vector> queryList_; + std::unordered_map> queryMap_; + bool includesAllSubsets_; + Subsets limitSubsets_; + Subsets presentSubsets_; }; } // namespace bufr } // namespace Ingester diff --git a/src/bufr/CMakeLists.txt b/src/bufr/CMakeLists.txt index 8d1f27042..a6cdb10aa 100644 --- a/src/bufr/CMakeLists.txt +++ b/src/bufr/CMakeLists.txt @@ -43,8 +43,8 @@ list(APPEND _ingester_srcs BufrParser/Query/VectorMath.h BufrParser/Query/QuerySet.h BufrParser/Query/QuerySet.cpp - BufrParser/Query/Query.h - BufrParser/Query/Query.cpp + BufrParser/Query/QueryRunner.h + BufrParser/Query/QueryRunner.cpp BufrParser/Query/QueryParser.h BufrParser/Query/QueryParser.cpp BufrParser/Query/ResultSet.h diff --git a/src/bufr/DataObject.h b/src/bufr/DataObject.h index 57aff3158..83ac1ca5a 100644 --- a/src/bufr/DataObject.h +++ b/src/bufr/DataObject.h @@ -14,6 +14,7 @@ #include #include +#include "eckit/exception/Exceptions.h" #include "ioda/ObsGroup.h" #include "ioda/defs.h" @@ -24,6 +25,44 @@ namespace Ingester typedef std::vector Dimensions; typedef Dimensions Location; + struct DimensionDataBase + { + std::shared_ptr dimScale; + + virtual void write(ioda::Variable& var) = 0; + }; + + template + struct DimensionData : public DimensionDataBase + { + std::vector data; + + DimensionData() = delete; + + explicit DimensionData(size_t size) : + data(std::vector(size, _default())) + { + } + + void write(ioda::Variable& var) + { + var.write(data); + } + + private: + template + T _default(typename std::enable_if::value, U>::type* = nullptr) + { + return static_cast(0); + } + + template + T _default(typename std::enable_if::value, U>::type* = nullptr) + { + return std::string(""); + } + }; + /// \brief Abstract base class for intermediate data object that bridges the Parsers with the /// IodaEncoder. class DataObjectBase @@ -71,6 +110,10 @@ namespace Ingester /// \return Float data. virtual float getAsFloat(const Location& loc) const = 0; + /// \brief Get the data at the index as an int. + /// \return Int data. + virtual int getAsInt(size_t idx) const = 0; + /// \brief Get the data at the index as an float. /// \return Float data. virtual float getAsFloat(size_t idx) const = 0; @@ -96,6 +139,20 @@ namespace Ingester const std::vector& chunks, int compressionLevel) const = 0; + /// \brief Makes a new dimension scale using this data object as the source + /// \param name The name of the dimension variable. + /// \param dimIdx The idx of the data dimension to use. + virtual std::shared_ptr createDimensionFromData( + const std::string& name, + std::size_t dimIdx) const = 0; + + /// \brief Makes a new blank dimension scale with default type. + /// \param name The name of the dimension variable. + /// \param dimIdx The idx of the data dimension to use. + virtual std::shared_ptr createEmptyDimension( + const std::string& name, + std::size_t dimIdx) const = 0; + /// \brief Slice the data object given a vector of row indices. /// \param slice The indices to slice. /// \return Slice of the data object. @@ -124,7 +181,7 @@ namespace Ingester { public: typedef T value_type; - constexpr T missingValue() const { return std::numeric_limits::max(); } + static constexpr T missingValue() { return std::numeric_limits::max(); } /// \brief Constructor. /// \param dimensions The dimensions of the data object. @@ -172,6 +229,48 @@ namespace Ingester return var; }; + /// \brief Makes a new dimension scale using this data object as the source + /// \param name The name of the dimension variable. + /// \param dimIdx The idx of the data dimension to use. + std::shared_ptr createDimensionFromData(const std::string& name, + std::size_t dimIdx) const final + { + auto dimData = std::make_shared>(getDims()[dimIdx]); + dimData->dimScale = ioda::NewDimensionScale(name, getDims()[dimIdx]); + + std::copy(data_.begin(), + data_.begin() + dimData->data.size(), + dimData->data.begin()); + + // Validate this data object is a valid (has values that repeat for each frame + for (size_t idx = 0; idx < data_.size(); idx += dimData->data.size()) + { + if (!std::equal(data_.begin(), + data_.begin() + dimData->data.size(), + data_.begin() + idx, + data_.begin() + idx + dimData->data.size())) + { + std::stringstream errStr; + errStr << "Dimension " << name << " has an invalid source field. "; + errStr << "The values dont repeat in each sequence."; + throw eckit::BadParameter(errStr.str()); + } + } + + return dimData; + } + + /// \brief Makes a new blank dimension scale with default type. + /// \param name The name of the dimension variable. + /// \param dimIdx The idx of the data dimension to use. + std::shared_ptr createEmptyDimension(const std::string& name, + std::size_t dimIdx) const final + { + auto dimData = std::make_shared>(getDims()[dimIdx]); + dimData->dimScale = ioda::NewDimensionScale(name, getDims()[dimIdx]); + return dimData; + } + /// \brief Print the data object to a output stream. void print(std::ostream &out) const final { @@ -220,7 +319,7 @@ namespace Ingester /// \brief Get the data at the location as an integer. /// \param loc The coordinate for the data point (ex: if data 2d then loc {2,4} gets data /// at that coordinate). - /// \return Integer data. + /// \return Int data. int getAsInt(const Location& loc) const final { return _getAsInt(loc); } /// \brief Get the data at the location as a float. @@ -235,12 +334,22 @@ namespace Ingester /// \return String data. std::string getAsString(const Location& loc) const final { return _getAsString(loc); } - /// \brief Get the data at the index into the internal 1d array as a float. This function + + /// \brief Get the data at the index into the internal 1d array as a int. This function /// gives you direct access to the internal data and doesn't account for dimensional - /// information (its up to the user). Note: getAsFloat(const Location&) is safer. + /// information (its up to the user). Note: getAsInt(const Location&) is safer. + /// \param idx The idx into the internal 1d array. + /// \return Int data. + int getAsInt(size_t idx) const final { return _getAsInt(idx); } + + + /// \brief idx Get the data at the index into the internal 1d array as a float. This + /// function gives you direct access to the internal data and doesn't account for + /// dimensional information (its up to the user). Note: getAsInt(const Location&) + /// is safer. /// \param idx The idx into the internal 1d array. /// \return Float data. - float getAsFloat(size_t idx) const final { return _getAsFloat(idx); } + float getAsFloat(const size_t idx) const final { return _getAsFloat(idx); } /// \brief Slice the dta object according to a list of indices. /// \param rows The indices to slice the data object by. @@ -328,7 +437,6 @@ namespace Ingester return params; } - /// \brief Get the data at the location as a float for numeric data. /// \return Float data. template @@ -383,6 +491,24 @@ namespace Ingester return get(loc); } + /// \brief Get the data at the index as a int for numeric data. + /// \return Int data. + template + int _getAsInt(size_t idx, + typename std::enable_if::value, U>::type* = nullptr) const + { + return static_cast(data_[idx]); + } + + /// \brief Get the data at the index as a int for non-numeric data. + /// \return Int data. + template + int _getAsInt(size_t idx, + typename std::enable_if::value, U>::type* = nullptr) const + { + throw std::runtime_error("The stored value is not a number"); + } + /// \brief Get the data at the index as a float for numeric data. /// \return Float data. template diff --git a/src/bufr/IodaEncoder/IodaDescription.cpp b/src/bufr/IodaEncoder/IodaDescription.cpp index 92975a82f..8493913ae 100644 --- a/src/bufr/IodaEncoder/IodaDescription.cpp +++ b/src/bufr/IodaEncoder/IodaDescription.cpp @@ -26,6 +26,7 @@ namespace const char* Name = "name"; const char* Path = "path"; const char* Paths = "paths"; + const char* Source = "source"; } // Dimension namespace Variable @@ -109,6 +110,11 @@ namespace Ingester throw eckit::BadParameter(R"(ioda::dimensions section must have either "path" or "paths".)"); } + if (dimConf.has(ConfKeys::Dimension::Source)) + { + dim.source = {dimConf.getString(ConfKeys::Dimension::Source)}; + } + addDimension(dim); } } diff --git a/src/bufr/IodaEncoder/IodaDescription.h b/src/bufr/IodaEncoder/IodaDescription.h index f943aebe3..d5813b79f 100644 --- a/src/bufr/IodaEncoder/IodaDescription.h +++ b/src/bufr/IodaEncoder/IodaDescription.h @@ -27,6 +27,7 @@ namespace Ingester { std::string name; std::vector paths; + std::string source; }; struct VariableDescription diff --git a/src/bufr/IodaEncoder/IodaEncoder.cpp b/src/bufr/IodaEncoder/IodaEncoder.cpp index 97b6d34ae..206779bdf 100644 --- a/src/bufr/IodaEncoder/IodaEncoder.cpp +++ b/src/bufr/IodaEncoder/IodaEncoder.cpp @@ -18,8 +18,6 @@ #include "ioda/Layout.h" #include "ioda/Misc/DimensionScales.h" -#include - namespace Ingester { @@ -41,6 +39,7 @@ namespace Ingester NamedPathDims namedLocDims; NamedPathDims namedExtraDims; + // Get a list of all the named dimensions { std::set dimNames; std::set dimPaths; @@ -75,23 +74,69 @@ namespace Ingester dimPaths.insert(path); } - namedExtraDims.insert({dim.paths, dim.name}); + namedExtraDims.insert({dim.paths, dim}); } } + // Got through each unique category for (const auto& categories : dataContainer->allSubCategories()) { // Create the dimensions variables - std::map> dimMap; + std::map> dimMap; auto dataObjectGroupBy = dataContainer->getGroupByObject( description_.getVariables()[0].source, categories); - dimMap[LocationName] = ioda::NewDimensionScale( - LocationName, dataObjectGroupBy->getDims()[0]); + // When we find that the primary index is zero we need to skip this category + if (dataObjectGroupBy->getDims()[0] == 0) + { + for (auto category : categories) + { + oops::Log::warning() << " Skipped category " << category << std::endl; + } + + continue; + } + + // Create the root Location dimension for this category + auto rootDim = std::make_shared>(dataObjectGroupBy->getDims()[0]); + rootDim->dimScale = + ioda::NewDimensionScale(LocationName, dataObjectGroupBy->getDims()[0]); + dimMap[LocationName] = rootDim; - namedLocDims[{dataObjectGroupBy->getDimPaths()[0]}] = LocationName; + // Add the root Location dimension as a named dimension + auto rootLocation = DimensionDescription(); + rootLocation.name = LocationName; + rootLocation.source = ""; + namedLocDims[{dataObjectGroupBy->getDimPaths()[0]}] = rootLocation; + // Create the dimension data for dimensions which include source data + for (const auto& dimDesc : description_.getDims()) + { + if (!dimDesc.source.empty()) + { + auto dataObject = dataContainer->get(dimDesc.source, categories); + + // Validate the path for the source field makes sense for the dimension + if (std::find(dimDesc.paths.begin(), + dimDesc.paths.end(), + dataObject->getDimPaths().back()) == dimDesc.paths.end()) + { + std::stringstream errStr; + errStr << "ioda::dimensions: Source field " << dimDesc.source << " in "; + errStr << dimDesc.name << " is not in the correct path."; + throw eckit::BadParameter(errStr.str()); + } + + // Create the dimension data + dimMap[dimDesc.name] = dataObject->createDimensionFromData( + dimDesc.name, + dataObject->getDimPaths().size() - 1); + } + } + + // Discover and create the dimension data for dimensions with no source field. If + // dim is un-named (not listed) then call it dim_ int autoGenDimNumber = 2; for (const auto& varDesc : description_.getVariables()) { @@ -104,7 +149,7 @@ namespace Ingester if (existsInNamedPath(dimPath, namedExtraDims)) { - dimName = nameForDimPath(dimPath, namedExtraDims); + dimName = dimForDimPath(dimPath, namedExtraDims).name; } else { @@ -112,29 +157,22 @@ namespace Ingester newDimStr << DefualtDimName << "_" << autoGenDimNumber; dimName = newDimStr.str(); - namedExtraDims[{dimPath}] = dimName; + + auto dimDesc = DimensionDescription(); + dimDesc.name = dimName; + dimDesc.source = ""; + + namedExtraDims[{dimPath}] = dimDesc; autoGenDimNumber++; } if (dimMap.find(dimName) == dimMap.end()) { - dimMap[dimName] = ioda::NewDimensionScale(dimName, - dataObject->getDims()[dimIdx]); + dimMap[dimName] = dataObject->createEmptyDimension(dimName, dimIdx); } } } - // When we find that the primary index is zero we need to skip this category - if (dataObjectGroupBy->getDims()[0] == 0) - { - for (auto category : categories) - { - oops::Log::warning() << " Skipped category " << category << std::endl; - } - - continue; - } - // Make the filename string if (description_.getBackend() == ioda::Engines::BackendNames::Hdf5File) { @@ -164,7 +202,7 @@ namespace Ingester ioda::NewDimensionScales_t allDims; for (auto dimPair : dimMap) { - allDims.push_back(dimPair.second); + allDims.push_back(dimPair.second->dimScale); } auto policy = ioda::detail::DataLayoutPolicy::Policies::ObsGroup; @@ -177,15 +215,42 @@ namespace Ingester global->addTo(rootGroup); } - // Create Variables + // Write the Dimension Variables + for (const auto& dimDesc : description_.getDims()) + { + if (!dimDesc.source.empty()) + { + auto dataObject = dataContainer->get(dimDesc.source, categories); + for (size_t dimIdx = 0; dimIdx < dataObject->getDims().size(); dimIdx++) + { + auto dimPath = dataObject->getDimPaths()[dimIdx]; + + NamedPathDims namedPathDims; + if (dimIdx == 0) + { + namedPathDims = namedLocDims; + } + else + { + namedPathDims = namedExtraDims; + } + + auto dimName = dimForDimPath(dimPath, namedPathDims).name; + auto dimVar = obsGroup.vars[dimName]; + dimMap[dimName]->write(dimVar); + } + } + } + + // Write all the other Variables for (const auto& varDesc : description_.getVariables()) { std::vector chunks; auto dimensions = std::vector(); - auto data = dataContainer->get(varDesc.source, categories); - for (size_t dimIdx = 0; dimIdx < data->getDims().size(); dimIdx++) + auto dataObject = dataContainer->get(varDesc.source, categories); + for (size_t dimIdx = 0; dimIdx < dataObject->getDims().size(); dimIdx++) { - auto dimPath = data->getDimPaths()[dimIdx]; + auto dimPath = dataObject->getDimPaths()[dimIdx]; NamedPathDims namedPathDims; if (dimIdx == 0) @@ -197,7 +262,7 @@ namespace Ingester namedPathDims = namedExtraDims; } - auto dimVar = obsGroup.vars[nameForDimPath(dimPath, namedPathDims)]; + auto dimVar = obsGroup.vars[dimForDimPath(dimPath, namedPathDims).name]; dimensions.push_back(dimVar); if (dimIdx < varDesc.chunks.size()) @@ -211,11 +276,11 @@ namespace Ingester } } - auto var = data->createVariable(obsGroup, - varDesc.name, - dimensions, - chunks, - varDesc.compressionLevel); + auto var = dataObject->createVariable(obsGroup, + varDesc.name, + dimensions, + chunks, + varDesc.compressionLevel); var.atts.add("long_name", { varDesc.longName }, {1}); @@ -305,7 +370,7 @@ namespace Ingester bool IodaEncoder::existsInNamedPath(const std::string& path, const NamedPathDims& pathMap) const { - for (auto paths : pathMap) + for (auto& paths : pathMap) { if (std::find(paths.first.begin(), paths.first.end(), path) != paths.first.end()) { @@ -316,20 +381,20 @@ namespace Ingester return false; } - std::string IodaEncoder::nameForDimPath(const std::string& path, - const NamedPathDims& pathMap) const + DimensionDescription IodaEncoder::dimForDimPath(const std::string& path, + const NamedPathDims& pathMap) const { - std::string name; + DimensionDescription dimDesc; for (auto paths : pathMap) { if (std::find(paths.first.begin(), paths.first.end(), path) != paths.first.end()) { - name = paths.second; + dimDesc = paths.second; break; } } - return name; + return dimDesc; } } // namespace Ingester diff --git a/src/bufr/IodaEncoder/IodaEncoder.h b/src/bufr/IodaEncoder/IodaEncoder.h index 0eb447bd2..0153eb620 100644 --- a/src/bufr/IodaEncoder/IodaEncoder.h +++ b/src/bufr/IodaEncoder/IodaEncoder.h @@ -33,7 +33,7 @@ namespace Ingester bool append = false); private: - typedef std::map, std::string> NamedPathDims; + typedef std::map, DimensionDescription> NamedPathDims; /// \brief The description const IodaDescription description_; @@ -55,10 +55,11 @@ namespace Ingester /// \return True if the subquery string is a named dimension. bool existsInNamedPath(const std::string& path, const NamedPathDims& pathMap) const; - /// \brief Get the name associated with the named dimension. + /// \brief Get the description associated with the named dimension. /// \param path The subquery string for the dimension. /// \param pathMap The map of named dimensions. - /// \return The name associated with the named dimension. - std::string nameForDimPath(const std::string& path, const NamedPathDims& pathMap) const; + /// \return The dimension description associated with the named dimension. + DimensionDescription dimForDimPath(const std::string& path, + const NamedPathDims& pathMap) const; }; } // namespace Ingester diff --git a/src/bufr/README.md b/src/bufr/README.md index 7fbff8c0a..55f49e8ed 100644 --- a/src/bufr/README.md +++ b/src/bufr/README.md @@ -55,6 +55,10 @@ Defines how to read data from the input BUFR file. Its sections are as follows: ```yaml exports: group_by_variable: longitude # Optional + subsets: + - NC004001 + - NC004002 + - NC004003 variables: timestamp: datetime: @@ -74,6 +78,8 @@ Defines how to read data from the input BUFR file. Its sections are as follows: - offset: -180 latitude: query: "*/CLAT" + channels: + query: "[*/BRITCSTC/CHNM, */BRIT/CHNM]" radiance: query: "[*/BRITCSTC/TMBR, */BRIT/TMBR]" @@ -97,7 +103,8 @@ ioda encoder. It has the following sections: * `group_by_variable` _(optional)_ String value that defines the name of the variable to group observations by. If this field is missing then observations will not be grouped. - +* `subsets` _(optional)_ List of subsets that you want to process. If the field is not present then + all subsets will be processed in accordance with the query definitions. * `variables` * **keys** are arbitrary strings (anything you want). They can be referenced in the ioda section. * **values** (One of these types): @@ -147,6 +154,7 @@ ioda encoder. It has the following sections: paths: - "*/BRIT" - "*/BRITCSTC" + source: variables/channels variables: - name: "MetaData/dateTime" @@ -184,8 +192,13 @@ The `ioda` section defines the ObsGroup objects that will be created. replaced with the relevant split category ID for that file to form a unique name for every file. * `dimensions` used to define dimension information in variables * `name` arbitrary name for the dimension - * `paths` - list of subqueries for that dimension (different paths for different BUFR subsets - only) **or** `path` Single subquery for that dimension ex: **\*/BRITCSTC** + * `paths` list of subqueries for that dimension (different paths for different BUFR subsets + only) **or** `path` Single subquery for that dimension ex: **\*/BRITCSTC** + * `source` (optional) The exported data that acts as the source field for this dimension. + The data dimension values (labels) will reflect this field. The source is validated + to make sure it makes sense for the dimension and that it is made up of repeated + values for each occurrence of the sequence. The source field must be inside the + dimension and be 1:1 with it. * `variables` List of output variable objects to create. * `name` standardized pathname **group**/**var_name**. * **var_name** name for the variable diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0aed22493..d8004920a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -222,6 +222,9 @@ if( iodaconv_bufr_ENABLED ) testinput/satwind_Insat_wmo.bufr testinput/vadwinds_wmoBUFR2ioda.yaml testinput/vadwinds_wmo_multi.bufr + testinput/gdas.t12z.aircft.tm00.bufr_d + testinput/bufr_specific_subsets_by_query.yaml + testinput/bufr_specifying_subsets.yaml ) list( APPEND test_output @@ -265,6 +268,7 @@ if( iodaconv_bufr_ENABLED ) testoutput/satwind_Himawari.nc testoutput/satwind_Insat.nc testoutput/vadwinds_wmo_multi.nc + testoutput/bufr_specifying_subsets.nc ) endif() @@ -1272,6 +1276,24 @@ if(iodaconv_bufr_ENABLED) gdas.t00z.sevcsr.tm00.nc ${IODA_CONV_COMP_TOL_ZERO} DEPENDS bufr2ioda.x ) + ecbuild_add_test( TARGET test_iodaconv_bufr_specific_subsets_by_query + TYPE SCRIPT + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${CMAKE_BINARY_DIR}/bin/bufr2ioda.x testinput/bufr_specific_subsets_by_query.yaml" + bufr_specifying_subsets.nc ${IODA_CONV_COMP_TOL_ZERO} + DEPENDS bufr2ioda.x ) + + ecbuild_add_test( TARGET test_iodaconv_bufr_specifying_subsets + TYPE SCRIPT + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${CMAKE_BINARY_DIR}/bin/bufr2ioda.x testinput/bufr_specifying_subsets.yaml" + bufr_specifying_subsets.nc ${IODA_CONV_COMP_TOL_ZERO} + DEPENDS bufr2ioda.x ) + # FIXME: Greg Thompson # ecbuild_add_test( TARGET test_iodaconv_bufr_aircar # TYPE SCRIPT diff --git a/test/testinput/bufr_hrs.yaml b/test/testinput/bufr_hrs.yaml index 830125744..1c99f8988 100644 --- a/test/testinput/bufr_hrs.yaml +++ b/test/testinput/bufr_hrs.yaml @@ -23,6 +23,8 @@ observations: query: "*/CLON" latitude: query: "*/CLAT" + channel: + query: "[*/BRITCSTC/CHNM, */BRIT/CHNM]" radiance: query: "[*/BRITCSTC/TMBR, */BRIT/TMBR]" @@ -31,10 +33,11 @@ observations: obsdataout: "./testrun/gdas.t00z.1bhrs4.tm00.nc" dimensions: - - name: Channel + - name: Channel paths: - "*/BRIT" - "*/BRITCSTC" +# source: variables/channel variables: - name: "dateTime@MetaData" diff --git a/test/testinput/bufr_mhs.yaml b/test/testinput/bufr_mhs.yaml index aad5fc47a..279526979 100644 --- a/test/testinput/bufr_mhs.yaml +++ b/test/testinput/bufr_mhs.yaml @@ -40,6 +40,8 @@ observations: query: "*/SAZA" vaz: query: "*/BEARAZ" + channels: + query: "[*/BRITCSTC/CHNM, */BRIT/CHNM]" radiance: query: "[*/BRITCSTC/TMBR, */BRIT/TMBR]" @@ -52,6 +54,7 @@ observations: paths: - "*/BRIT" - "*/BRITCSTC" + source: variables/channels globals: - name: "sensorCentralFrequency@MetaData" diff --git a/test/testinput/bufr_specific_subsets_by_query.yaml b/test/testinput/bufr_specific_subsets_by_query.yaml new file mode 100644 index 000000000..a2023f3d1 --- /dev/null +++ b/test/testinput/bufr_specific_subsets_by_query.yaml @@ -0,0 +1,48 @@ +# (C) Copyright 2020 NOAA/NWS/NCEP/EMC +# # # +# # # This software is licensed under the terms of the Apache Licence Version 2.0 +# # # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +observations: + - obs space: + name: bufr + + obsdatain: "./testinput/gdas.t12z.aircft.tm00.bufr_d" + + exports: + #MetaData + variables: + timestamp: + datetime: + year: "[NC004001/YEAR, NC004002/YEAR, NC004003/YEAR, NC004006/YEAR, NC004009/YEAR, NC004010/YEAR, NC004011/YEAR]" + month: "[NC004001/MNTH, NC004002/MNTH, NC004003/MNTH, NC004006/MNTH, NC004009/MNTH, NC004010/MNTH, NC004011/MNTH]" + day: "[NC004001/DAYS, NC004002/DAYS, NC004003/DAYS, NC004006/DAYS, NC004009/DAYS, NC004010/DAYS, NC004011/DAYS]" + hour: "[NC004001/HOUR, NC004002/HOUR, NC004003/HOUR, NC004006/HOUR, NC004009/HOUR, NC004010/HOUR, NC004011/HOUR]" + minute: "[NC004001/MINU, NC004002/MINU, NC004003/MINU, NC004006/MINU, NC004009/MINU, NC004010/MINU, NC004011/MINU]" + latitude: + query: "[NC004001/CLAT, NC004002/CLAT, NC004003/CLAT, NC004006/CLATH, NC004009/CLATH, NC004010/CLATH, NC004011/CLATH]" + longitude: + query: "[NC004001/CLON, NC004002/CLON, NC004003/CLON, NC004006/CLONH, NC004009/CLONH, NC004010/CLONH, NC004011/CLONH]" + + ioda: + backend: netcdf + obsdataout: "./testrun/bufr_specifying_subsets.nc" + + #MetaData + variables: + - name: "MetaData/dateTime" + source: variables/timestamp + longName: "Datetime" + units: "seconds since 1970-01-01T00:00:00Z" + + - name: "MetaData/latitude" + source: variables/latitude + longName: "Latitude" + units: "degree_north" + range: [-90, 90] + + - name: "MetaData/longitude" + source: variables/longitude + longName: "Longitude" + units: "degree_east" + range: [-180, 180] diff --git a/test/testinput/bufr_specifying_subsets.yaml b/test/testinput/bufr_specifying_subsets.yaml new file mode 100644 index 000000000..3e45a1acc --- /dev/null +++ b/test/testinput/bufr_specifying_subsets.yaml @@ -0,0 +1,57 @@ +# (C) Copyright 2020 NOAA/NWS/NCEP/EMC +# # # +# # # This software is licensed under the terms of the Apache Licence Version 2.0 +# # # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +observations: + - obs space: + name: bufr + + obsdatain: "./testinput/gdas.t12z.aircft.tm00.bufr_d" + + exports: + subsets: + - NC004001 + - NC004002 + - NC004003 + - NC004006 + - NC004009 + - NC004010 + - NC004011 + + #MetaData + variables: + timestamp: + datetime: + year: "*/YEAR" + month: "*/MNTH" + day: "*/DAYS" + hour: "*/HOUR" + minute: "*/MINU" + latitude: + query: "[*/CLATH, */CLAT]" + longitude: + query: "[*/CLONH, */CLON]" + + ioda: + backend: netcdf + obsdataout: "./testrun/bufr_specifying_subsets.nc" + + #MetaData + variables: + - name: "MetaData/dateTime" + source: variables/timestamp + longName: "Datetime" + units: "seconds since 1970-01-01T00:00:00Z" + + - name: "MetaData/latitude" + source: variables/latitude + longName: "Latitude" + units: "degree_north" + range: [-90, 90] + + - name: "MetaData/longitude" + source: variables/longitude + longName: "Longitude" + units: "degree_east" + range: [-180, 180] diff --git a/test/testinput/gdas.t12z.aircft.tm00.bufr_d b/test/testinput/gdas.t12z.aircft.tm00.bufr_d new file mode 100644 index 000000000..704f6fa51 --- /dev/null +++ b/test/testinput/gdas.t12z.aircft.tm00.bufr_d @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b1ee8552c02f8015a32fe3eea1708fa3fd39c8bf02bd5cd4a7a42d50c2adf9 +size 123088 diff --git a/test/testoutput/bufr_specifying_subsets.nc b/test/testoutput/bufr_specifying_subsets.nc new file mode 100644 index 000000000..ecb61585f --- /dev/null +++ b/test/testoutput/bufr_specifying_subsets.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3b86786b014bf39de2abaa815688ee982a32aadebe50149ea016acadf5646f +size 24062 diff --git a/test/testoutput/gdas.t18z.1bmhs.tm00.nc b/test/testoutput/gdas.t18z.1bmhs.tm00.nc index db596cd3a..41e8fa886 100644 --- a/test/testoutput/gdas.t18z.1bmhs.tm00.nc +++ b/test/testoutput/gdas.t18z.1bmhs.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ca1088d8d7f6185c3313486cf2924a1f9f0aa25a8a733fb23ece7112eca0b60 +oid sha256:b8dde16bfd27b73a4777f48ddbaa76b86c32d82994c57091bf2f6eb4269192a1 size 798784