From e65cc3ed6a411fa7fc1e60aeeb25ddf7297379aa Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Tue, 20 Sep 2022 11:52:41 -0400 Subject: [PATCH 01/10] Fixed some bugs with the datetime object. Added some validation. --- .../Exports/Splits/CategorySplit.cpp | 7 ++- .../Exports/Variables/DatetimeVariable.cpp | 54 +++++++++++-------- src/bufr/DataObject.h | 31 ++++++----- 3 files changed, 51 insertions(+), 41 deletions(-) diff --git a/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp b/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp index 77c51fba4..8d30aa319 100644 --- a/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp +++ b/src/bufr/BufrParser/Exports/Splits/CategorySplit.cpp @@ -83,11 +83,10 @@ namespace Ingester auto location = Location(dataObject->getDims().size(), 0); location[0] = rowIdx; - auto itemVal = dataObject->getAsFloat(location); - if (trunc(itemVal) == itemVal) + if (auto dat = std::dynamic_pointer_cast> (dataObject)) { - nameMap_.insert({static_cast (itemVal), - std::to_string(static_cast (itemVal))}); + auto itemVal = dat->get(location); + nameMap_.insert({itemVal, std::to_string(itemVal)}); } else { diff --git a/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp b/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp index d20bd149c..50116edbc 100644 --- a/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp +++ b/src/bufr/BufrParser/Exports/Variables/DatetimeVariable.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -71,8 +72,7 @@ namespace Ingester std::shared_ptr DatetimeVariable::exportData(const BufrDataMap& map) { checkKeys(map); - static const float missing = 1.e+11; - static const int64_t missing_int = INT_MIN; + static const int missingInt = DataObject::missingValue(); std::tm tm{}; // zero initialise tm.tm_year = 1970-1900; // 1970 @@ -83,26 +83,38 @@ namespace Ingester tm.tm_sec = 0; tm.tm_isdst = 0; // Not daylight saving std::time_t epochDt = std::mktime(&tm); - std::time_t this_time = std::mktime(&tm); - int64_t diff_time; std::vector timeOffsets; timeOffsets.reserve(map.at(getExportKey(ConfKeys::Year))->size()); + // Validation + if (map.at(getExportKey(ConfKeys::Year))->getDims().size() != 1 || + map.at(getExportKey(ConfKeys::Month))->getDims().size() != 1 || + map.at(getExportKey(ConfKeys::Day))->getDims().size() != 1 || + (!minuteQuery_.empty() && + map.at(getExportKey(ConfKeys::Minute))->getDims().size() != 1) || + (!secondQuery_.empty() && + map.at(getExportKey(ConfKeys::Second))->getDims().size() != 1)) + { + std::ostringstream errStr; + errStr << "Datetime variables must be 1 dimensional."; + throw eckit::BadParameter(errStr.str()); + } + for (unsigned int idx = 0; idx < map.at(getExportKey(ConfKeys::Year))->size(); idx++) { - int year = static_cast(map.at(getExportKey(ConfKeys::Year))->getAsFloat(idx)); - int month = static_cast(map.at(getExportKey(ConfKeys::Month))->getAsFloat(idx)); - int day = static_cast(map.at(getExportKey(ConfKeys::Day))->getAsFloat(idx)); - int hour = static_cast(map.at(getExportKey(ConfKeys::Hour))->getAsFloat(idx)); + int year = map.at(getExportKey(ConfKeys::Year))->getAsInt(idx); + int month = map.at(getExportKey(ConfKeys::Month))->getAsInt(idx); + int day = map.at(getExportKey(ConfKeys::Day))->getAsInt(idx); + int hour = map.at(getExportKey(ConfKeys::Hour))->getAsInt(idx); int minutes = 0; int seconds = 0; - diff_time = missing_int; - if (year != missing && - month != missing && - day != missing && - hour != missing) + auto diff_time = DataObject::missingValue(); + if (year != missingInt && + month != missingInt && + day != missingInt && + hour != missingInt) { tm.tm_year = year - 1900; tm.tm_mon = month - 1; @@ -114,8 +126,7 @@ namespace Ingester if (!minuteQuery_.empty()) { - minutes = - static_cast(map.at(getExportKey(ConfKeys::Minute))->getAsFloat(idx)); + minutes = map.at(getExportKey(ConfKeys::Minute))->getAsInt(idx); if (minutes >= 0 && minutes < 60) { @@ -125,8 +136,7 @@ namespace Ingester if (!secondQuery_.empty()) { - seconds = - static_cast(map.at(getExportKey(ConfKeys::Second))->getAsFloat(idx)); + seconds = map.at(getExportKey(ConfKeys::Second))->getAsInt(idx); if (seconds >= 0 && seconds < 60) { @@ -134,16 +144,18 @@ namespace Ingester } } - this_time = std::mktime(&tm); - if (this_time < 0) + // Be careful with mktime as it can be very slow. + auto thisTime = std::mktime(&tm); + if (thisTime < 0) { oops::Log::warning() << "Caution, date suspicious date (year, month, day): " << year << ", " << month << ", " << day << std::endl; } - diff_time = static_cast(difftime(this_time, epochDt) - + hoursFromUtc_*3600); + + diff_time = static_cast(difftime(thisTime, epochDt) + + hoursFromUtc_ * 3600); } timeOffsets.push_back(diff_time); diff --git a/src/bufr/DataObject.h b/src/bufr/DataObject.h index 5d3766579..8b144778c 100644 --- a/src/bufr/DataObject.h +++ b/src/bufr/DataObject.h @@ -110,9 +110,9 @@ namespace Ingester /// \return Float data. virtual float getAsFloat(const Location& loc) const = 0; - /// \brief Get the data at the index as an float. + /// \brief Get the data at the index as an int. /// \return Float data. - virtual float getAsFloat(size_t idx) const = 0; + virtual int getAsInt(size_t idx) const = 0; /// \brief Get the data at the Location as an string. /// \return String data. @@ -177,7 +177,7 @@ namespace Ingester { public: typedef T value_type; - constexpr T missingValue() const { return std::numeric_limits::max(); } + static constexpr T missingValue() { return std::numeric_limits::max(); } /// \brief Constructor. /// \param dimensions The dimensions of the data object. @@ -315,7 +315,7 @@ namespace Ingester /// \brief Get the data at the location as an integer. /// \param loc The coordinate for the data point (ex: if data 2d then loc {2,4} gets data /// at that coordinate). - /// \return Integer data. + /// \return Int data. int getAsInt(const Location& loc) const final { return _getAsInt(loc); } /// \brief Get the data at the location as a float. @@ -330,12 +330,12 @@ namespace Ingester /// \return String data. std::string getAsString(const Location& loc) const final { return _getAsString(loc); } - /// \brief Get the data at the index into the internal 1d array as a float. This function + /// \brief Get the data at the index into the internal 1d array as a int. This function /// gives you direct access to the internal data and doesn't account for dimensional - /// information (its up to the user). Note: getAsFloat(const Location&) is safer. + /// information (its up to the user). Note: getAsInt(const Location&) is safer. /// \param idx The idx into the internal 1d array. - /// \return Float data. - float getAsFloat(size_t idx) const final { return _getAsFloat(idx); } + /// \return Int data. + int getAsInt(size_t idx) const final { return _getAsInt(idx); } /// \brief Slice the dta object according to a list of indices. /// \param rows The indices to slice the data object by. @@ -477,23 +477,22 @@ namespace Ingester return get(loc); } - /// \brief Get the data at the index as a float for numeric data. - /// \return Float data. + /// \brief Get the data at the index as a int for numeric data. + /// \return Int data. template - float _getAsFloat(size_t idx, + int _getAsInt(size_t idx, typename std::enable_if::value, U>::type* = nullptr) const { - return static_cast(data_[idx]); + return static_cast(data_[idx]); } - /// \brief Get the data at the index as a float for non-numeric data. - /// \return Float data. + /// \brief Get the data at the index as a int for non-numeric data. + /// \return Int data. template - float _getAsFloat(size_t idx, + int _getAsInt(size_t idx, typename std::enable_if::value, U>::type* = nullptr) const { throw std::runtime_error("The stored value was is not a number"); - return 0.0f; } /// \brief Set the data associated with this data object (numeric DataObject). From c183def78e6bf9bf2d24eb646f1310a18e9446a6 Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Tue, 20 Sep 2022 16:45:19 -0400 Subject: [PATCH 02/10] Made modifications so that the the messages would be filtered based on the query subset defenition. --- src/bufr/BufrParser/Query/File.cpp | 28 ++++-- src/bufr/BufrParser/Query/QueryParser.cpp | 33 +++++-- src/bufr/BufrParser/Query/QueryParser.h | 19 ++-- .../Query/{Query.cpp => QueryRunner.cpp} | 93 +++++++++++-------- .../Query/{Query.h => QueryRunner.h} | 8 +- src/bufr/BufrParser/Query/QuerySet.cpp | 30 +++++- src/bufr/BufrParser/Query/QuerySet.h | 31 +++---- src/bufr/CMakeLists.txt | 4 +- src/bufr/DataObject.h | 2 +- 9 files changed, 159 insertions(+), 89 deletions(-) rename src/bufr/BufrParser/Query/{Query.cpp => QueryRunner.cpp} (85%) rename src/bufr/BufrParser/Query/{Query.h => QueryRunner.h} (94%) diff --git a/src/bufr/BufrParser/Query/File.cpp b/src/bufr/BufrParser/Query/File.cpp index 2684baf30..fae7447bf 100644 --- a/src/bufr/BufrParser/Query/File.cpp +++ b/src/bufr/BufrParser/Query/File.cpp @@ -7,9 +7,11 @@ #include "File.h" +#include + #include "bufr_interface.h" -#include "Query.h" +#include "QueryRunner.h" #include "QuerySet.h" #include "DataProvider.h" @@ -61,7 +63,7 @@ namespace bufr { { static int SubsetLen = 9; unsigned int messageNum = 0; - char subset[SubsetLen]; + char subsetChars[SubsetLen]; int iddate; int bufrLoc; @@ -70,18 +72,24 @@ namespace bufr { auto dataProvider = DataProvider(fileUnit_); auto resultSet = ResultSet(querySet.names()); - auto query = Query(querySet, resultSet, dataProvider); + auto query = QueryRunner(querySet, resultSet, dataProvider); - while (ireadmg_f(fileUnit_, subset, &iddate, SubsetLen) == 0) + while (ireadmg_f(fileUnit_, subsetChars, &iddate, SubsetLen) == 0) { - while (ireadsb_f(fileUnit_) == 0) + auto subset = std::string(subsetChars); + subset.erase(std::remove_if(subset.begin(), subset.end(), isspace), subset.end()); + + if (querySet.includesSubset(subset)) { - status_f(fileUnit_, &bufrLoc, &il, &im); - dataProvider.updateData(bufrLoc); - query.query(); + while (ireadsb_f(fileUnit_) == 0) + { + status_f(fileUnit_, &bufrLoc, &il, &im); + dataProvider.updateData(bufrLoc); + query.query(); + } + + if (next > 0 && ++messageNum >= next) break; } - - if (next > 0 && ++messageNum >= next) break; } resultSet.setTargets(query.getTargets()); diff --git a/src/bufr/BufrParser/Query/QueryParser.cpp b/src/bufr/BufrParser/Query/QueryParser.cpp index e2d91ebad..28ea941f5 100644 --- a/src/bufr/BufrParser/Query/QueryParser.cpp +++ b/src/bufr/BufrParser/Query/QueryParser.cpp @@ -14,7 +14,19 @@ namespace Ingester { namespace bufr { - std::vector QueryParser::splitMultiquery(const std::string &query) { + std::vector QueryParser::parse(const std::string& queryStr) + { + std::vector queries; + for (auto& subStr : QueryParser::splitMultiquery(queryStr)) + { + queries.emplace_back(QueryParser::splitQueryStr(subStr)); + } + + return queries; + } + + std::vector QueryParser::splitMultiquery(const std::string &query) + { std::vector subqueries; // Remove whitespace from query and assign to working_str @@ -66,11 +78,8 @@ namespace bufr { return subqueries; } - - void QueryParser::splitQueryStr(const std::string& query, - std::string& subset, - std::vector& mnemonics, - int& index) { + Query QueryParser::splitQueryStr(const std::string& query) + { // Find positions of slashes std::vector slashPositions; size_t slashIdx = 0; @@ -89,7 +98,7 @@ namespace bufr { } // Capture the subset string - subset = query.substr(0, slashPositions[0]); + auto subset = query.substr(0, slashPositions[0]); std::vector mnemonicStrings(slashPositions.size()); @@ -105,7 +114,7 @@ namespace bufr { std::string lastElement = query.substr(slashPositions[slashPositions.size() - 1] + 1); // Parse last element - index = -1; + int index = -1; size_t startSubscript = lastElement.find_first_of("["); size_t endSubscript = lastElement.find_first_of("]"); if (startSubscript != std::string::npos && endSubscript != std::string::npos) @@ -126,7 +135,13 @@ namespace bufr { mnemonicStrings.back() = lastElement; } - mnemonics = mnemonicStrings; + auto queryObj = Query(); + queryObj.queryStr = query; + queryObj.subset = subset; + queryObj.mnemonics = mnemonicStrings; + queryObj.index = index; + + return queryObj; } } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/QueryParser.h b/src/bufr/BufrParser/Query/QueryParser.h index 1f860cea2..dd360e78e 100644 --- a/src/bufr/BufrParser/Query/QueryParser.h +++ b/src/bufr/BufrParser/Query/QueryParser.h @@ -13,24 +13,29 @@ namespace Ingester { namespace bufr { + struct Query + { + std::string queryStr; + std::string subset; + std::vector mnemonics; + int index; + }; + /// \brief Parses a user supplied query string into its component parts. /// \note Will be refactored to properly tokenize the query string. class QueryParser { public: + static std::vector parse(const std::string& queryStr); + + private: /// \brief Split a multi query (ex: ["*/CLONH", "*/CLON"]) into a vector of single queries. /// \param query The query to split. static std::vector splitMultiquery(const std::string& query); /// \brief Split a single query (ex: "*/ROSEQ1/ROSEQ2/PCCF[2]") into its component parts. /// \param query The query to split. - /// \param[out] subset The subset part of the query (ex: *). - /// \param[out] mnemonics Query path components (ex: ["ROSEQ1", "ROSEQ2", "PCCF"]). - /// \param[out] index The index associated with this query (ex: 2). - static void splitQueryStr(const std::string& query, - std::string& subset, - std::vector& mnemonics, - int& index); + static Query splitQueryStr(const std::string& query); private: /// \brief Private constructor. diff --git a/src/bufr/BufrParser/Query/Query.cpp b/src/bufr/BufrParser/Query/QueryRunner.cpp similarity index 85% rename from src/bufr/BufrParser/Query/Query.cpp rename to src/bufr/BufrParser/Query/QueryRunner.cpp index af1b685ac..c07ae84de 100644 --- a/src/bufr/BufrParser/Query/Query.cpp +++ b/src/bufr/BufrParser/Query/QueryRunner.cpp @@ -4,7 +4,7 @@ * This software is licensed under the terms of the Apache Licence Version 2.0 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. */ -#include "Query.h" +#include "QueryRunner.h" #include "eckit/exception/Exceptions.h" #include "oops/util/Logger.h" @@ -12,7 +12,6 @@ #include #include -#include "QueryParser.h" #include "Constants.h" namespace Ingester { @@ -23,7 +22,7 @@ namespace bufr { std::vector counts; }; - Query::Query(const QuerySet &querySet, + QueryRunner::QueryRunner(const QuerySet &querySet, ResultSet &resultSet, const DataProvider &dataProvider) : querySet_(querySet), @@ -32,7 +31,7 @@ namespace bufr { { } - void Query::query() { + void QueryRunner::query() { Targets targets; std::shared_ptr<__details::ProcessingMasks> masks; @@ -40,7 +39,7 @@ namespace bufr { collectData(targets, masks, resultSet_); } - void Query::findTargets(Targets &targets, std::shared_ptr<__details::ProcessingMasks> &masks) { + void QueryRunner::findTargets(Targets &targets, std::shared_ptr<__details::ProcessingMasks> &masks) { // Check if the target list for this subset is cached if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) { targets = targetCache_.at(dataProvider_.getSubset()); @@ -55,20 +54,21 @@ namespace bufr { masks->valueNodeMask.resize(numNodes, false); masks->pathNodeMask.resize(numNodes, false); - for (size_t targetIdx = 0; targetIdx < querySet_.size(); ++targetIdx) { - auto queryName = querySet_.nameAt(targetIdx); - auto queryStr = querySet_.queryAt(targetIdx); - - auto subQueries = QueryParser::splitMultiquery(queryStr); + for (size_t targetIdx = 0; targetIdx < querySet_.size(); ++targetIdx) + { + auto queryName = querySet_.names()[targetIdx]; + auto subQueries = querySet_.queriesFor(queryName); bool foundTarget = false; std::shared_ptr target; - for (size_t subQueryIdx = 0; subQueryIdx < subQueries.size(); ++subQueryIdx) { - const std::string &subQuery = subQueries[subQueryIdx]; + for (size_t subQueryIdx = 0; subQueryIdx < subQueries.size(); ++subQueryIdx) + { + const Query& subQuery = subQueries[subQueryIdx]; target = findTarget(queryName, subQuery); - if (target->nodeIds.size() > 0) { + if (target->nodeIds.size() > 0) + { // Collect mask data masks->valueNodeMask[target->nodeIds[0]] = true; for (size_t pathIdx = 0; pathIdx < target->seqPath.size(); ++pathIdx) { @@ -81,14 +81,34 @@ namespace bufr { } } - if (!foundTarget) { + if (!foundTarget) + { // Add the last missing target to the list targets.push_back(target); - oops::Log::warning() << "Warning: Query String " - << queryStr - << " didn't apply to subset " - << dataProvider_.getSubset() - << std::endl; + oops::Log::warning() << "Warning: Query String "; + + auto queries = querySet_.queriesFor(queryName); + + if (queries.size() == 1) + { + oops::Log::warning() << queries[0].queryStr; + } + else + { + oops::Log::warning() << "["; + for (auto subQuery = queries.cbegin(); + subQuery < queries.cend(); + ++subQuery) + { + if (subQuery != queries.cbegin()) oops::Log::warning() << ", "; + oops::Log::warning() << subQuery->queryStr; + } + oops::Log::warning() << "]"; + } + + oops::Log::warning() << " didn't apply to subset "; + oops::Log::warning() << dataProvider_.getSubset(); + oops::Log::warning() << std::endl; } } @@ -96,23 +116,18 @@ namespace bufr { maskCache_.insert({dataProvider_.getSubset(), masks}); } - std::shared_ptr Query::findTarget(const std::string &targetName, - const std::string &query) const { - std::string querySubset; - std::vector mnemonics; - int index; - - QueryParser::splitQueryStr(query, querySubset, mnemonics, index); - + std::shared_ptr QueryRunner::findTarget(const std::string &targetName, + const Query& query) const + { std::vector branches; std::vector targetNodes; std::vector seqPath; std::vector dimPaths; std::vector dimIdxs; - bool targetMissing = !(querySubset == "*" || querySubset == dataProvider_.getSubset()); + bool targetMissing = !(query.subset == "*" || query.subset == dataProvider_.getSubset()); if (!targetMissing) { - branches.resize(mnemonics.size() - 1); + branches.resize(query.mnemonics.size() - 1); seqPath.push_back(dataProvider_.getInode()); @@ -126,7 +141,7 @@ namespace bufr { dataProvider_.getTyp(nodeIdx) == Typ::Repeat || dataProvider_.getTyp(nodeIdx) == Typ::StackedRepeat) { if (isQueryNode(nodeIdx - 1)) { - if (dataProvider_.getTag(nodeIdx) == mnemonics[mnemonicCursor + 1] && + if (dataProvider_.getTag(nodeIdx) == query.mnemonics[mnemonicCursor + 1] && tableCursor == mnemonicCursor) { mnemonicCursor++; branches[mnemonicCursor] = nodeIdx - 1; @@ -134,9 +149,9 @@ namespace bufr { tableCursor++; } seqPath.push_back(nodeIdx); - } else if (mnemonicCursor == static_cast(mnemonics.size()) - 2 && + } else if (mnemonicCursor == static_cast(query.mnemonics.size()) - 2 && tableCursor == mnemonicCursor && - dataProvider_.getTag(nodeIdx) == mnemonics.back()) { + dataProvider_.getTag(nodeIdx) == query.mnemonics.back()) { // We found a target targetNodes.push_back(nodeIdx); getDimInfo(branches, mnemonicCursor, dimPaths, dimIdxs); @@ -195,21 +210,21 @@ namespace bufr { } } - if (index > 0 && index <= gsl::narrow(targetNodes.size())) { - targetNodes = {targetNodes[index - 1]}; + if (query.index > 0 && query.index <= gsl::narrow(targetNodes.size())) { + targetNodes = {targetNodes[query.index - 1]}; } if (targetNodes.size() > 1) { std::ostringstream errMsg; errMsg << "Query string must return 1 target. Are you missing an index? "; - errMsg << query << "."; + errMsg << query.queryStr << "."; throw eckit::BadParameter(errMsg.str()); } } auto target = std::make_shared(); target->name = targetName; - target->queryStr = query; + target->queryStr = query.queryStr; target->seqPath = branches; target->nodeIds = targetNodes; @@ -226,14 +241,14 @@ namespace bufr { return target; } - bool Query::isQueryNode(int nodeIdx) const { + bool QueryRunner::isQueryNode(int nodeIdx) const { return (dataProvider_.getTyp(nodeIdx) == Typ::DelayedRep || dataProvider_.getTyp(nodeIdx) == Typ::FixedRep || dataProvider_.getTyp(nodeIdx) == Typ::DelayedRepStacked || dataProvider_.getTyp(nodeIdx) == Typ::DelayedBinary); } - void Query::getDimInfo(const std::vector &branches, + void QueryRunner::getDimInfo(const std::vector &branches, int mnemonicCursor, std::vector &dimPaths, std::vector &dimIdxs) const { @@ -274,7 +289,7 @@ namespace bufr { } } - void Query::collectData(Targets& targets, + void QueryRunner::collectData(Targets& targets, std::shared_ptr<__details::ProcessingMasks> masks, ResultSet &resultSet) const { std::vector currentPath; diff --git a/src/bufr/BufrParser/Query/Query.h b/src/bufr/BufrParser/Query/QueryRunner.h similarity index 94% rename from src/bufr/BufrParser/Query/Query.h rename to src/bufr/BufrParser/Query/QueryRunner.h index 2075466f2..ba0c4bf04 100644 --- a/src/bufr/BufrParser/Query/Query.h +++ b/src/bufr/BufrParser/Query/QueryRunner.h @@ -51,14 +51,14 @@ namespace bufr { } // namespace __details /// \brief Manages the execution of queries against on a BUFR file. - class Query + class QueryRunner { public: /// \brief Constructor. /// \param[in] querySet The set of queries to execute against the BUFR file. /// \param[in, out] resultSet The object used to store the accumulated collected data. /// \param[in] dataProvider The BUFR data provider to use. - Query(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); + QueryRunner(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); void query(); Targets getTargets() @@ -96,8 +96,8 @@ namespace bufr { /// \brief Find the target associated with a specific user provided query string. /// \param[in] targetName The name specified for the target. /// \param[in] query The query string to use. - std::shared_ptr findTarget(const std::string& targetName, - const std::string& query) const; + std::shared_ptr findTarget(const std::string &targetName, + const Query& query) const; /// \brief Does the node idx correspond to an element you'd find in a query string (repeat diff --git a/src/bufr/BufrParser/Query/QuerySet.cpp b/src/bufr/BufrParser/Query/QuerySet.cpp index cd44f8a98..b53b08ba0 100644 --- a/src/bufr/BufrParser/Query/QuerySet.cpp +++ b/src/bufr/BufrParser/Query/QuerySet.cpp @@ -11,10 +11,38 @@ namespace Ingester { namespace bufr { + void QuerySet::add(const std::string& name, const std::string& queryStr) + { + std::vector queries; + for (const auto& query : QueryParser::parse(queryStr)) + { + if (query.subset == "*") + { + includesAllSubsets_ = true; + } + + includedSubsets_.emplace(query.subset); + queries.emplace_back(query); + } + + queryMap_[name] = queries; + } + + bool QuerySet::includesSubset(const std::string& subset) const + { + bool includesSubset = true; + if (!includesAllSubsets_) + { + includesSubset = (includedSubsets_.find(subset) != includedSubsets_.end()); + } + + return includesSubset; + } + std::vector QuerySet::names() const { std::vector names; - for (auto const& query : queryList_) + for (auto const& query : queryMap_) { names.push_back(query.first); } diff --git a/src/bufr/BufrParser/Query/QuerySet.h b/src/bufr/BufrParser/Query/QuerySet.h index adcf26622..b7bfb8565 100644 --- a/src/bufr/BufrParser/Query/QuerySet.h +++ b/src/bufr/BufrParser/Query/QuerySet.h @@ -7,9 +7,13 @@ #pragma once +#include #include +#include #include +#include "QueryParser.h" + namespace Ingester { namespace bufr { @@ -23,30 +27,25 @@ namespace bufr /// \brief Add a new query to the collection. /// \param[in] name The name of the query. /// \param[in] query The query string. - void add(const std::string& name, const std::string& query) - { - queryList_.push_back({name, query}); - } + void add(const std::string& name, const std::string& query); /// \brief Returns the size of the collection. - size_t size() const { return queryList_.size(); } - - /// \brief Returns the name of the query at the specified index. - /// \param[in] idx The index of the query.. - /// \return The name of the query. - std::string nameAt(size_t idx) const { return queryList_.at(idx).first; } - - /// \brief Returns the query string at the specified index. - /// \param[in] idx The index of the query. - /// \return The query string. - std::string queryAt(size_t idx) const { return queryList_.at(idx).second; } + size_t size() const { return queryMap_.size(); } /// \brief Returns the names of all the queries. /// \return A vector of the names of all the queries. std::vector names() const; + /// \brief Returns a list of subsets. + /// \return A vector of the names of all the queries. + bool includesSubset(const std::string& subset) const; + + std::vector queriesFor(const std::string& name) const { return queryMap_.at(name); } + private: - std::vector> queryList_; + std::unordered_map> queryMap_; + bool includesAllSubsets_; + std::set includedSubsets_; }; } // namespace bufr } // namespace Ingester diff --git a/src/bufr/CMakeLists.txt b/src/bufr/CMakeLists.txt index 8d1f27042..824359be0 100644 --- a/src/bufr/CMakeLists.txt +++ b/src/bufr/CMakeLists.txt @@ -43,8 +43,8 @@ list(APPEND _ingester_srcs BufrParser/Query/VectorMath.h BufrParser/Query/QuerySet.h BufrParser/Query/QuerySet.cpp - BufrParser/Query/Query.h - BufrParser/Query/Query.cpp + BufrParser/Query/QueryRunner.h + BufrParser/Query/QueryRunner.cpp BufrParser/Query/QueryParser.h BufrParser/Query/QueryParser.cpp BufrParser/Query/ResultSet.h diff --git a/src/bufr/DataObject.h b/src/bufr/DataObject.h index 8b144778c..75a090d91 100644 --- a/src/bufr/DataObject.h +++ b/src/bufr/DataObject.h @@ -492,7 +492,7 @@ namespace Ingester int _getAsInt(size_t idx, typename std::enable_if::value, U>::type* = nullptr) const { - throw std::runtime_error("The stored value was is not a number"); + throw std::runtime_error("The stored value is not a number"); } /// \brief Set the data associated with this data object (numeric DataObject). From eb3c877bef99a51e3e37c745e1555b784c18cbc0 Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Wed, 21 Sep 2022 10:09:08 -0400 Subject: [PATCH 03/10] Added configuration parameter called subsets to explicitly limit the valid subsets. --- src/bufr/BufrParser/BufrParser.cpp | 7 ++++++ src/bufr/BufrParser/Exports/Export.cpp | 6 +++++ src/bufr/BufrParser/Exports/Export.h | 2 ++ src/bufr/BufrParser/Query/QuerySet.cpp | 32 ++++++++++++++++++++++++++ src/bufr/BufrParser/Query/QuerySet.h | 4 ++++ 5 files changed, 51 insertions(+) diff --git a/src/bufr/BufrParser/BufrParser.cpp b/src/bufr/BufrParser/BufrParser.cpp index 4686012ae..be1f4d996 100644 --- a/src/bufr/BufrParser/BufrParser.cpp +++ b/src/bufr/BufrParser/BufrParser.cpp @@ -49,6 +49,7 @@ namespace Ingester { auto startTime = std::chrono::steady_clock::now(); auto querySet = bufr::QuerySet(); + for (const auto &var : description_.getExport().getVariables()) { for (const auto &queryPair : var->getQueryList()) @@ -57,6 +58,12 @@ namespace Ingester { } } + auto subsets = description_.getExport().getSubsets(); + if (!subsets.empty()) + { + querySet.limitSubsets(subsets); + } + oops::Log::info() << "Executing Queries" << std::endl; const auto resultSet = file_.execute(querySet, maxMsgsToParse); diff --git a/src/bufr/BufrParser/Exports/Export.cpp b/src/bufr/BufrParser/Exports/Export.cpp index b421af3ad..7c3fafee8 100644 --- a/src/bufr/BufrParser/Exports/Export.cpp +++ b/src/bufr/BufrParser/Exports/Export.cpp @@ -28,6 +28,7 @@ namespace const char* Splits = "splits"; const char* Variables = "variables"; const char* GroupByVariable = "group_by_variable"; + const char* Subsets = "subsets"; namespace Variable { @@ -75,6 +76,11 @@ namespace Ingester groupByVariable = conf.getString(ConfKeys::GroupByVariable); } + if (conf.has(ConfKeys::Subsets)) + { + subsets_ = conf.getStringVector(ConfKeys::Subsets); + } + if (conf.has(ConfKeys::Variables)) { addVariables(conf.getSubConfiguration(ConfKeys::Variables), diff --git a/src/bufr/BufrParser/Exports/Export.h b/src/bufr/BufrParser/Exports/Export.h index 51a574ac7..f1d940e64 100644 --- a/src/bufr/BufrParser/Exports/Export.h +++ b/src/bufr/BufrParser/Exports/Export.h @@ -35,11 +35,13 @@ namespace Ingester inline Splits getSplits() const { return splits_; } inline Variables getVariables() const { return variables_; } inline Filters getFilters() const { return filters_; } + inline std::vector getSubsets() const { return subsets_; }; private: Splits splits_; Variables variables_; Filters filters_; + std::vector subsets_; /// \brief Create Variables exports from config. void addVariables(const eckit::Configuration &conf, diff --git a/src/bufr/BufrParser/Query/QuerySet.cpp b/src/bufr/BufrParser/Query/QuerySet.cpp index b53b08ba0..76ba3e9b6 100644 --- a/src/bufr/BufrParser/Query/QuerySet.cpp +++ b/src/bufr/BufrParser/Query/QuerySet.cpp @@ -7,6 +7,8 @@ #include "QuerySet.h" +#include +#include namespace Ingester { namespace bufr { @@ -39,6 +41,36 @@ namespace bufr { return includesSubset; } + void QuerySet::limitSubsets(std::vector subsets) + { + auto subsetsSet = std::set(subsets.begin(), subsets.end()); + if (includesAllSubsets_) + { + includedSubsets_ = subsetsSet; + + } + else + { + for(auto& s : includedSubsets_) std::cout << s << " "; + std::cout << std::endl; + + std::vector newSubsets; + std::set_intersection(subsetsSet.begin(), + subsetsSet.end(), + includedSubsets_.begin(), + includedSubsets_.end(), + std::back_inserter(newSubsets)); + + includedSubsets_ = std::set(newSubsets.begin(), + newSubsets.end()); + + for(auto& s : includedSubsets_) std::cout << s << " "; + std::cout << std::endl; + } + + includesAllSubsets_ = false; + } + std::vector QuerySet::names() const { std::vector names; diff --git a/src/bufr/BufrParser/Query/QuerySet.h b/src/bufr/BufrParser/Query/QuerySet.h index b7bfb8565..56e1782d0 100644 --- a/src/bufr/BufrParser/Query/QuerySet.h +++ b/src/bufr/BufrParser/Query/QuerySet.h @@ -40,6 +40,10 @@ namespace bufr /// \return A vector of the names of all the queries. bool includesSubset(const std::string& subset) const; + /// \brief Limit the subsets to the ones presented. + /// \param[in] subsets A vector of subsets. + void limitSubsets(std::vector subsets); + std::vector queriesFor(const std::string& name) const { return queryMap_.at(name); } private: From 8b37ae5cdaa90cd824bc7afbbbea9183544e716b Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Wed, 21 Sep 2022 10:45:11 -0400 Subject: [PATCH 04/10] Improved the implemention a little. --- src/bufr/BufrParser/BufrParser.cpp | 8 +-- src/bufr/BufrParser/Query/QuerySet.cpp | 74 +++++++++++++------------- src/bufr/BufrParser/Query/QuerySet.h | 11 ++-- 3 files changed, 44 insertions(+), 49 deletions(-) diff --git a/src/bufr/BufrParser/BufrParser.cpp b/src/bufr/BufrParser/BufrParser.cpp index be1f4d996..00160e0d7 100644 --- a/src/bufr/BufrParser/BufrParser.cpp +++ b/src/bufr/BufrParser/BufrParser.cpp @@ -48,7 +48,7 @@ namespace Ingester { { auto startTime = std::chrono::steady_clock::now(); - auto querySet = bufr::QuerySet(); + auto querySet = bufr::QuerySet(description_.getExport().getSubsets()); for (const auto &var : description_.getExport().getVariables()) { @@ -58,12 +58,6 @@ namespace Ingester { } } - auto subsets = description_.getExport().getSubsets(); - if (!subsets.empty()) - { - querySet.limitSubsets(subsets); - } - oops::Log::info() << "Executing Queries" << std::endl; const auto resultSet = file_.execute(querySet, maxMsgsToParse); diff --git a/src/bufr/BufrParser/Query/QuerySet.cpp b/src/bufr/BufrParser/Query/QuerySet.cpp index 76ba3e9b6..11a045ee6 100644 --- a/src/bufr/BufrParser/Query/QuerySet.cpp +++ b/src/bufr/BufrParser/Query/QuerySet.cpp @@ -13,17 +13,50 @@ namespace Ingester { namespace bufr { + QuerySet::QuerySet(const std::vector& subsets) : + includesAllSubsets_(false), + limitSubsets_(std::set(subsets.begin(), + subsets.end())), + presentSubsets_({}) + { + } + void QuerySet::add(const std::string& name, const std::string& queryStr) { std::vector queries; - for (const auto& query : QueryParser::parse(queryStr)) + for (const auto &query: QueryParser::parse(queryStr)) { - if (query.subset == "*") + if (limitSubsets_.empty()) + { + if (query.subset == "*") + { + includesAllSubsets_ = true; + } + + presentSubsets_.insert(query.subset); + } + else { - includesAllSubsets_ = true; + if (query.subset == "*") + { + presentSubsets_ = limitSubsets_; + } + else + { + presentSubsets_.insert(query.subset); + + std::vector newSubsets; + std::set_intersection(limitSubsets_.begin(), + limitSubsets_.end(), + presentSubsets_.begin(), + presentSubsets_.end(), + std::back_inserter(newSubsets)); + + presentSubsets_ = std::set(newSubsets.begin(), + newSubsets.end()); + } } - includedSubsets_.emplace(query.subset); queries.emplace_back(query); } @@ -35,42 +68,12 @@ namespace bufr { bool includesSubset = true; if (!includesAllSubsets_) { - includesSubset = (includedSubsets_.find(subset) != includedSubsets_.end()); + includesSubset = (presentSubsets_.find(subset) != presentSubsets_.end()); } return includesSubset; } - void QuerySet::limitSubsets(std::vector subsets) - { - auto subsetsSet = std::set(subsets.begin(), subsets.end()); - if (includesAllSubsets_) - { - includedSubsets_ = subsetsSet; - - } - else - { - for(auto& s : includedSubsets_) std::cout << s << " "; - std::cout << std::endl; - - std::vector newSubsets; - std::set_intersection(subsetsSet.begin(), - subsetsSet.end(), - includedSubsets_.begin(), - includedSubsets_.end(), - std::back_inserter(newSubsets)); - - includedSubsets_ = std::set(newSubsets.begin(), - newSubsets.end()); - - for(auto& s : includedSubsets_) std::cout << s << " "; - std::cout << std::endl; - } - - includesAllSubsets_ = false; - } - std::vector QuerySet::names() const { std::vector names; @@ -81,6 +84,5 @@ namespace bufr { return names; } - } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/QuerySet.h b/src/bufr/BufrParser/Query/QuerySet.h index 56e1782d0..54864418f 100644 --- a/src/bufr/BufrParser/Query/QuerySet.h +++ b/src/bufr/BufrParser/Query/QuerySet.h @@ -17,11 +17,13 @@ namespace Ingester { namespace bufr { + typedef std::set Subsets; + /// \brief Manages a collection of queries. class QuerySet { public: - QuerySet() = default; + explicit QuerySet(const std::vector& subsets); ~QuerySet() = default; /// \brief Add a new query to the collection. @@ -40,16 +42,13 @@ namespace bufr /// \return A vector of the names of all the queries. bool includesSubset(const std::string& subset) const; - /// \brief Limit the subsets to the ones presented. - /// \param[in] subsets A vector of subsets. - void limitSubsets(std::vector subsets); - std::vector queriesFor(const std::string& name) const { return queryMap_.at(name); } private: std::unordered_map> queryMap_; bool includesAllSubsets_; - std::set includedSubsets_; + Subsets limitSubsets_; + Subsets presentSubsets_; }; } // namespace bufr } // namespace Ingester From 202676d48ae402adb34979a42fe9f9c0c097c1e7 Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Wed, 21 Sep 2022 10:55:36 -0400 Subject: [PATCH 05/10] Fixed coding norm issues --- src/bufr/BufrParser/Exports/Export.h | 2 +- src/bufr/BufrParser/Query/QueryRunner.cpp | 10 +++++++--- src/bufr/BufrParser/Query/QueryRunner.h | 4 +++- src/bufr/BufrParser/Query/QuerySet.cpp | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/bufr/BufrParser/Exports/Export.h b/src/bufr/BufrParser/Exports/Export.h index f1d940e64..baed395d5 100644 --- a/src/bufr/BufrParser/Exports/Export.h +++ b/src/bufr/BufrParser/Exports/Export.h @@ -35,7 +35,7 @@ namespace Ingester inline Splits getSplits() const { return splits_; } inline Variables getVariables() const { return variables_; } inline Filters getFilters() const { return filters_; } - inline std::vector getSubsets() const { return subsets_; }; + inline std::vector getSubsets() const { return subsets_; } private: Splits splits_; diff --git a/src/bufr/BufrParser/Query/QueryRunner.cpp b/src/bufr/BufrParser/Query/QueryRunner.cpp index c07ae84de..67372eb9c 100644 --- a/src/bufr/BufrParser/Query/QueryRunner.cpp +++ b/src/bufr/BufrParser/Query/QueryRunner.cpp @@ -31,7 +31,8 @@ namespace bufr { { } - void QueryRunner::query() { + void QueryRunner::query() + { Targets targets; std::shared_ptr<__details::ProcessingMasks> masks; @@ -39,9 +40,12 @@ namespace bufr { collectData(targets, masks, resultSet_); } - void QueryRunner::findTargets(Targets &targets, std::shared_ptr<__details::ProcessingMasks> &masks) { + void QueryRunner::findTargets(Targets &targets, + std::shared_ptr<__details::ProcessingMasks> &masks) + { // Check if the target list for this subset is cached - if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) { + if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) + { targets = targetCache_.at(dataProvider_.getSubset()); masks = maskCache_.at(dataProvider_.getSubset()); return; diff --git a/src/bufr/BufrParser/Query/QueryRunner.h b/src/bufr/BufrParser/Query/QueryRunner.h index ba0c4bf04..687e09627 100644 --- a/src/bufr/BufrParser/Query/QueryRunner.h +++ b/src/bufr/BufrParser/Query/QueryRunner.h @@ -58,7 +58,9 @@ namespace bufr { /// \param[in] querySet The set of queries to execute against the BUFR file. /// \param[in, out] resultSet The object used to store the accumulated collected data. /// \param[in] dataProvider The BUFR data provider to use. - QueryRunner(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); + QueryRunner(const QuerySet& querySet, + ResultSet& resultSet, + const DataProvider& dataProvider); void query(); Targets getTargets() diff --git a/src/bufr/BufrParser/Query/QuerySet.cpp b/src/bufr/BufrParser/Query/QuerySet.cpp index 11a045ee6..5587136de 100644 --- a/src/bufr/BufrParser/Query/QuerySet.cpp +++ b/src/bufr/BufrParser/Query/QuerySet.cpp @@ -24,7 +24,7 @@ namespace bufr { void QuerySet::add(const std::string& name, const std::string& queryStr) { std::vector queries; - for (const auto &query: QueryParser::parse(queryStr)) + for (const auto &query : QueryParser::parse(queryStr)) { if (limitSubsets_.empty()) { From 09d4b8b50819798823839e0117ba86b7a2b4b8fe Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Wed, 21 Sep 2022 11:04:36 -0400 Subject: [PATCH 06/10] Renamed query function to accumulate. --- src/bufr/BufrParser/Query/File.cpp | 6 +++--- src/bufr/BufrParser/Query/QueryRunner.cpp | 2 +- src/bufr/BufrParser/Query/QueryRunner.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bufr/BufrParser/Query/File.cpp b/src/bufr/BufrParser/Query/File.cpp index fae7447bf..d05aa582d 100644 --- a/src/bufr/BufrParser/Query/File.cpp +++ b/src/bufr/BufrParser/Query/File.cpp @@ -72,7 +72,7 @@ namespace bufr { auto dataProvider = DataProvider(fileUnit_); auto resultSet = ResultSet(querySet.names()); - auto query = QueryRunner(querySet, resultSet, dataProvider); + auto queryRunner = QueryRunner(querySet, resultSet, dataProvider); while (ireadmg_f(fileUnit_, subsetChars, &iddate, SubsetLen) == 0) { @@ -85,14 +85,14 @@ namespace bufr { { status_f(fileUnit_, &bufrLoc, &il, &im); dataProvider.updateData(bufrLoc); - query.query(); + queryRunner.accumulate(); } if (next > 0 && ++messageNum >= next) break; } } - resultSet.setTargets(query.getTargets()); + resultSet.setTargets(queryRunner.getTargets()); dataProvider.deleteData(); diff --git a/src/bufr/BufrParser/Query/QueryRunner.cpp b/src/bufr/BufrParser/Query/QueryRunner.cpp index 67372eb9c..fae955f90 100644 --- a/src/bufr/BufrParser/Query/QueryRunner.cpp +++ b/src/bufr/BufrParser/Query/QueryRunner.cpp @@ -31,7 +31,7 @@ namespace bufr { { } - void QueryRunner::query() + void QueryRunner::accumulate() { Targets targets; std::shared_ptr<__details::ProcessingMasks> masks; diff --git a/src/bufr/BufrParser/Query/QueryRunner.h b/src/bufr/BufrParser/Query/QueryRunner.h index 687e09627..8e9d0d731 100644 --- a/src/bufr/BufrParser/Query/QueryRunner.h +++ b/src/bufr/BufrParser/Query/QueryRunner.h @@ -61,7 +61,7 @@ namespace bufr { QueryRunner(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); - void query(); + void accumulate(); Targets getTargets() { From 33170060a41bf3ecb64212868124a5f7d015c7c4 Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Wed, 21 Sep 2022 11:05:30 -0400 Subject: [PATCH 07/10] Added some yaml files for testing purposes (still need to make the test data). --- .../bufr_specific_subsets_by_query.yaml | 48 ++++++++++++++++ test/testinput/bufr_specifying_subsets.yaml | 57 +++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 test/testinput/bufr_specific_subsets_by_query.yaml create mode 100644 test/testinput/bufr_specifying_subsets.yaml diff --git a/test/testinput/bufr_specific_subsets_by_query.yaml b/test/testinput/bufr_specific_subsets_by_query.yaml new file mode 100644 index 000000000..7e538b875 --- /dev/null +++ b/test/testinput/bufr_specific_subsets_by_query.yaml @@ -0,0 +1,48 @@ +# (C) Copyright 2020 NOAA/NWS/NCEP/EMC +# # # +# # # This software is licensed under the terms of the Apache Licence Version 2.0 +# # # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +observations: + - obs space: + name: bufr + + obsdatain: "./testinput/gdas.t12z.aircft.tm00.bufr_d" + + exports: + #MetaData + variables: + timestamp: + datetime: + year: "[NC004001/YEAR, NC004002/YEAR, NC004003/YEAR, NC004006/YEAR, NC004009/YEAR, NC004010/YEAR, NC004011/YEAR]" + month: "[NC004001/MNTH, NC004002/MNTH, NC004003/MNTH, NC004006/MNTH, NC004009/MNTH, NC004010/MNTH, NC004011/MNTH]" + day: "[NC004001/DAYS, NC004002/DAYS, NC004003/DAYS, NC004006/DAYS, NC004009/DAYS, NC004010/DAYS, NC004011/DAYS]" + hour: "[NC004001/HOUR, NC004002/HOUR, NC004003/HOUR, NC004006/HOUR, NC004009/HOUR, NC004010/HOUR, NC004011/HOUR]" + minute: "[NC004001/MINU, NC004002/MINU, NC004003/MINU, NC004006/MINU, NC004009/MINU, NC004010/MINU, NC004011/MINU]" + latitude: + query: "[NC004001/CLAT, NC004002/CLAT, NC004003/CLAT, NC004006/CLATH, NC004009/CLATH, NC004010/CLATH, NC004011/CLATH]" + longitude: + query: "[NC004001/CLON, NC004002/CLON, NC004003/CLON, NC004006/CLONH, NC004009/CLONH, NC004010/CLONH, NC004011/CLONH]" + + ioda: + backend: netcdf + obsdataout: "./testrun/bufr_specific_subsets_by_query.nc" + + #MetaData + variables: + - name: "MetaData/dateTime" + source: variables/timestamp + longName: "Datetime" + units: "seconds since 1970-01-01T00:00:00Z" + + - name: "MetaData/latitude" + source: variables/latitude + longName: "Latitude" + units: "degree_north" + range: [-90, 90] + + - name: "MetaData/longitude" + source: variables/longitude + longName: "Longitude" + units: "degree_east" + range: [-180, 180] diff --git a/test/testinput/bufr_specifying_subsets.yaml b/test/testinput/bufr_specifying_subsets.yaml new file mode 100644 index 000000000..3e45a1acc --- /dev/null +++ b/test/testinput/bufr_specifying_subsets.yaml @@ -0,0 +1,57 @@ +# (C) Copyright 2020 NOAA/NWS/NCEP/EMC +# # # +# # # This software is licensed under the terms of the Apache Licence Version 2.0 +# # # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +observations: + - obs space: + name: bufr + + obsdatain: "./testinput/gdas.t12z.aircft.tm00.bufr_d" + + exports: + subsets: + - NC004001 + - NC004002 + - NC004003 + - NC004006 + - NC004009 + - NC004010 + - NC004011 + + #MetaData + variables: + timestamp: + datetime: + year: "*/YEAR" + month: "*/MNTH" + day: "*/DAYS" + hour: "*/HOUR" + minute: "*/MINU" + latitude: + query: "[*/CLATH, */CLAT]" + longitude: + query: "[*/CLONH, */CLON]" + + ioda: + backend: netcdf + obsdataout: "./testrun/bufr_specifying_subsets.nc" + + #MetaData + variables: + - name: "MetaData/dateTime" + source: variables/timestamp + longName: "Datetime" + units: "seconds since 1970-01-01T00:00:00Z" + + - name: "MetaData/latitude" + source: variables/latitude + longName: "Latitude" + units: "degree_north" + range: [-90, 90] + + - name: "MetaData/longitude" + source: variables/longitude + longName: "Longitude" + units: "degree_east" + range: [-180, 180] From 0966327d8a1794ff823ac5968633321b16dc209d Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Thu, 22 Sep 2022 13:56:54 -0400 Subject: [PATCH 08/10] Added unit tests --- test/CMakeLists.txt | 22 +++++++++++++++++++ .../bufr_specific_subsets_by_query.yaml | 2 +- test/testinput/gdas.t12z.aircft.tm00.bufr_d | 3 +++ test/testoutput/bufr_specifying_subsets.nc | 3 +++ 4 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 test/testinput/gdas.t12z.aircft.tm00.bufr_d create mode 100644 test/testoutput/bufr_specifying_subsets.nc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d5347d5a4..60ee61b58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -216,6 +216,9 @@ if( iodaconv_bufr_ENABLED ) testinput/satwind_Insat_wmo.bufr testinput/vadwinds_wmoBUFR2ioda.yaml testinput/vadwinds_wmo_multi.bufr + testinput/gdas.t12z.aircft.tm00.bufr_d + testinput/bufr_specific_subsets_by_query.yaml + testinput/bufr_specifying_subsets.yaml ) list( APPEND test_output @@ -259,6 +262,7 @@ if( iodaconv_bufr_ENABLED ) testoutput/satwind_Himawari.nc testoutput/satwind_Insat.nc testoutput/vadwinds_wmo_multi.nc + testoutput/bufr_specifying_subsets.nc ) endif() @@ -1216,6 +1220,24 @@ if(iodaconv_bufr_ENABLED) gdas.t00z.sevcsr.tm00.nc ${IODA_CONV_COMP_TOL_ZERO} DEPENDS bufr2ioda.x ) + ecbuild_add_test( TARGET test_iodaconv_bufr_specific_subsets_by_query + TYPE SCRIPT + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${CMAKE_BINARY_DIR}/bin/bufr2ioda.x testinput/bufr_specific_subsets_by_query.yaml" + bufr_specifying_subsets.nc ${IODA_CONV_COMP_TOL_ZERO} + DEPENDS bufr2ioda.x ) + + ecbuild_add_test( TARGET test_iodaconv_bufr_specifying_subsets + TYPE SCRIPT + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${CMAKE_BINARY_DIR}/bin/bufr2ioda.x testinput/bufr_specifying_subsets.yaml" + bufr_specifying_subsets.nc ${IODA_CONV_COMP_TOL_ZERO} + DEPENDS bufr2ioda.x ) + # FIXME: Greg Thompson # ecbuild_add_test( TARGET test_iodaconv_bufr_aircar # TYPE SCRIPT diff --git a/test/testinput/bufr_specific_subsets_by_query.yaml b/test/testinput/bufr_specific_subsets_by_query.yaml index 7e538b875..a2023f3d1 100644 --- a/test/testinput/bufr_specific_subsets_by_query.yaml +++ b/test/testinput/bufr_specific_subsets_by_query.yaml @@ -26,7 +26,7 @@ observations: ioda: backend: netcdf - obsdataout: "./testrun/bufr_specific_subsets_by_query.nc" + obsdataout: "./testrun/bufr_specifying_subsets.nc" #MetaData variables: diff --git a/test/testinput/gdas.t12z.aircft.tm00.bufr_d b/test/testinput/gdas.t12z.aircft.tm00.bufr_d new file mode 100644 index 000000000..704f6fa51 --- /dev/null +++ b/test/testinput/gdas.t12z.aircft.tm00.bufr_d @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b1ee8552c02f8015a32fe3eea1708fa3fd39c8bf02bd5cd4a7a42d50c2adf9 +size 123088 diff --git a/test/testoutput/bufr_specifying_subsets.nc b/test/testoutput/bufr_specifying_subsets.nc new file mode 100644 index 000000000..ecb61585f --- /dev/null +++ b/test/testoutput/bufr_specifying_subsets.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3b86786b014bf39de2abaa815688ee982a32aadebe50149ea016acadf5646f +size 24062 From 2818ec7001b2d320b70b08ef8e426ccaa9484d2e Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Thu, 22 Sep 2022 14:47:01 -0400 Subject: [PATCH 09/10] updated readme --- src/bufr/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/bufr/README.md b/src/bufr/README.md index 7a0f9e3cc..55f49e8ed 100644 --- a/src/bufr/README.md +++ b/src/bufr/README.md @@ -55,6 +55,10 @@ Defines how to read data from the input BUFR file. Its sections are as follows: ```yaml exports: group_by_variable: longitude # Optional + subsets: + - NC004001 + - NC004002 + - NC004003 variables: timestamp: datetime: @@ -99,7 +103,8 @@ ioda encoder. It has the following sections: * `group_by_variable` _(optional)_ String value that defines the name of the variable to group observations by. If this field is missing then observations will not be grouped. - +* `subsets` _(optional)_ List of subsets that you want to process. If the field is not present then + all subsets will be processed in accordance with the query definitions. * `variables` * **keys** are arbitrary strings (anything you want). They can be referenced in the ioda section. * **values** (One of these types): From ea8126e4f0bafd8266773eb2a6260fcb3aa80e02 Mon Sep 17 00:00:00 2001 From: Ron McLaren Date: Mon, 26 Sep 2022 11:17:56 -0400 Subject: [PATCH 10/10] added getAsFloat back. --- src/bufr/DataObject.h | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/bufr/DataObject.h b/src/bufr/DataObject.h index 75a090d91..9ca4972e6 100644 --- a/src/bufr/DataObject.h +++ b/src/bufr/DataObject.h @@ -111,9 +111,13 @@ namespace Ingester virtual float getAsFloat(const Location& loc) const = 0; /// \brief Get the data at the index as an int. - /// \return Float data. + /// \return Int data. virtual int getAsInt(size_t idx) const = 0; + /// \brief Get the data at the index as an float. + /// \return Float data. + virtual float getAsFloat(size_t idx) const = 0; + /// \brief Get the data at the Location as an string. /// \return String data. virtual std::string getAsString(const Location& loc) const = 0; @@ -330,6 +334,7 @@ namespace Ingester /// \return String data. std::string getAsString(const Location& loc) const final { return _getAsString(loc); } + /// \brief Get the data at the index into the internal 1d array as a int. This function /// gives you direct access to the internal data and doesn't account for dimensional /// information (its up to the user). Note: getAsInt(const Location&) is safer. @@ -337,6 +342,15 @@ namespace Ingester /// \return Int data. int getAsInt(size_t idx) const final { return _getAsInt(idx); } + + /// \brief idx Get the data at the index into the internal 1d array as a float. This + /// function gives you direct access to the internal data and doesn't account for + /// dimensional information (its up to the user). Note: getAsInt(const Location&) + /// is safer. + /// \param idx The idx into the internal 1d array. + /// \return Float data. + float getAsFloat(const size_t idx) const final { return _getAsFloat(idx); } + /// \brief Slice the dta object according to a list of indices. /// \param rows The indices to slice the data object by. /// \return Sliced DataObject. @@ -495,6 +509,25 @@ namespace Ingester throw std::runtime_error("The stored value is not a number"); } + /// \brief Get the data at the index as a float for numeric data. + /// \return Float data. + template + float _getAsFloat(size_t idx, + typename std::enable_if::value, U>::type* = nullptr) const + { + return static_cast(data_[idx]); + } + + /// \brief Get the data at the index as a float for non-numeric data. + /// \return Float data. + template + float _getAsFloat(size_t idx, + typename std::enable_if::value, U>::type* = nullptr) const + { + throw std::runtime_error("The stored value was is not a number"); + return 0.0f; + } + /// \brief Set the data associated with this data object (numeric DataObject). /// \param data - double vector of raw data /// \param dataMissingValue - The number that represents missing values within the raw data